Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
0003  *
0004  * This software is available to you under a choice of one of two
0005  * licenses.  You may choose to be licensed under the terms of the GNU
0006  * General Public License (GPL) Version 2, available from the file
0007  * COPYING in the main directory of this source tree, or the
0008  * OpenIB.org BSD license below:
0009  *
0010  *     Redistribution and use in source and binary forms, with or
0011  *     without modification, are permitted provided that the following
0012  *     conditions are met:
0013  *
0014  *      - Redistributions of source code must retain the above
0015  *    copyright notice, this list of conditions and the following
0016  *    disclaimer.
0017  *
0018  *      - Redistributions in binary form must reproduce the above
0019  *    copyright notice, this list of conditions and the following
0020  *    disclaimer in the documentation and/or other materials
0021  *    provided with the distribution.
0022  *
0023  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0024  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0025  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0026  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0027  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0028  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0029  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0030  * SOFTWARE.
0031  */
0032 #include <linux/module.h>
0033 #include <linux/list.h>
0034 #include <linux/workqueue.h>
0035 #include <linux/skbuff.h>
0036 #include <linux/timer.h>
0037 #include <linux/notifier.h>
0038 #include <linux/inetdevice.h>
0039 #include <linux/ip.h>
0040 #include <linux/tcp.h>
0041 #include <linux/if_vlan.h>
0042 
0043 #include <net/neighbour.h>
0044 #include <net/netevent.h>
0045 #include <net/route.h>
0046 #include <net/tcp.h>
0047 #include <net/ip6_route.h>
0048 #include <net/addrconf.h>
0049 
0050 #include <rdma/ib_addr.h>
0051 
0052 #include <libcxgb_cm.h>
0053 #include "iw_cxgb4.h"
0054 #include "clip_tbl.h"
0055 
0056 static char *states[] = {
0057     "idle",
0058     "listen",
0059     "connecting",
0060     "mpa_wait_req",
0061     "mpa_req_sent",
0062     "mpa_req_rcvd",
0063     "mpa_rep_sent",
0064     "fpdu_mode",
0065     "aborting",
0066     "closing",
0067     "moribund",
0068     "dead",
0069     NULL,
0070 };
0071 
0072 static int nocong;
0073 module_param(nocong, int, 0644);
0074 MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
0075 
0076 static int enable_ecn;
0077 module_param(enable_ecn, int, 0644);
0078 MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
0079 
0080 static int dack_mode;
0081 module_param(dack_mode, int, 0644);
0082 MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
0083 
0084 uint c4iw_max_read_depth = 32;
0085 module_param(c4iw_max_read_depth, int, 0644);
0086 MODULE_PARM_DESC(c4iw_max_read_depth,
0087          "Per-connection max ORD/IRD (default=32)");
0088 
0089 static int enable_tcp_timestamps;
0090 module_param(enable_tcp_timestamps, int, 0644);
0091 MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
0092 
0093 static int enable_tcp_sack;
0094 module_param(enable_tcp_sack, int, 0644);
0095 MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
0096 
0097 static int enable_tcp_window_scaling = 1;
0098 module_param(enable_tcp_window_scaling, int, 0644);
0099 MODULE_PARM_DESC(enable_tcp_window_scaling,
0100          "Enable tcp window scaling (default=1)");
0101 
0102 static int peer2peer = 1;
0103 module_param(peer2peer, int, 0644);
0104 MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
0105 
0106 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
0107 module_param(p2p_type, int, 0644);
0108 MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
0109                "1=RDMA_READ 0=RDMA_WRITE (default 1)");
0110 
0111 static int ep_timeout_secs = 60;
0112 module_param(ep_timeout_secs, int, 0644);
0113 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
0114                    "in seconds (default=60)");
0115 
0116 static int mpa_rev = 2;
0117 module_param(mpa_rev, int, 0644);
0118 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
0119         "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft"
0120         " compliant (default=2)");
0121 
0122 static int markers_enabled;
0123 module_param(markers_enabled, int, 0644);
0124 MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
0125 
0126 static int crc_enabled = 1;
0127 module_param(crc_enabled, int, 0644);
0128 MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
0129 
0130 static int rcv_win = 256 * 1024;
0131 module_param(rcv_win, int, 0644);
0132 MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
0133 
0134 static int snd_win = 128 * 1024;
0135 module_param(snd_win, int, 0644);
0136 MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
0137 
0138 static struct workqueue_struct *workq;
0139 
0140 static struct sk_buff_head rxq;
0141 
0142 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
0143 static void ep_timeout(struct timer_list *t);
0144 static void connect_reply_upcall(struct c4iw_ep *ep, int status);
0145 static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
0146 
0147 static LIST_HEAD(timeout_list);
0148 static DEFINE_SPINLOCK(timeout_lock);
0149 
0150 static void deref_cm_id(struct c4iw_ep_common *epc)
0151 {
0152     epc->cm_id->rem_ref(epc->cm_id);
0153     epc->cm_id = NULL;
0154     set_bit(CM_ID_DEREFED, &epc->history);
0155 }
0156 
0157 static void ref_cm_id(struct c4iw_ep_common *epc)
0158 {
0159     set_bit(CM_ID_REFED, &epc->history);
0160     epc->cm_id->add_ref(epc->cm_id);
0161 }
0162 
0163 static void deref_qp(struct c4iw_ep *ep)
0164 {
0165     c4iw_qp_rem_ref(&ep->com.qp->ibqp);
0166     clear_bit(QP_REFERENCED, &ep->com.flags);
0167     set_bit(QP_DEREFED, &ep->com.history);
0168 }
0169 
0170 static void ref_qp(struct c4iw_ep *ep)
0171 {
0172     set_bit(QP_REFERENCED, &ep->com.flags);
0173     set_bit(QP_REFED, &ep->com.history);
0174     c4iw_qp_add_ref(&ep->com.qp->ibqp);
0175 }
0176 
0177 static void start_ep_timer(struct c4iw_ep *ep)
0178 {
0179     pr_debug("ep %p\n", ep);
0180     if (timer_pending(&ep->timer)) {
0181         pr_err("%s timer already started! ep %p\n",
0182                __func__, ep);
0183         return;
0184     }
0185     clear_bit(TIMEOUT, &ep->com.flags);
0186     c4iw_get_ep(&ep->com);
0187     ep->timer.expires = jiffies + ep_timeout_secs * HZ;
0188     add_timer(&ep->timer);
0189 }
0190 
0191 static int stop_ep_timer(struct c4iw_ep *ep)
0192 {
0193     pr_debug("ep %p stopping\n", ep);
0194     del_timer_sync(&ep->timer);
0195     if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
0196         c4iw_put_ep(&ep->com);
0197         return 0;
0198     }
0199     return 1;
0200 }
0201 
0202 static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
0203           struct l2t_entry *l2e)
0204 {
0205     int error = 0;
0206 
0207     if (c4iw_fatal_error(rdev)) {
0208         kfree_skb(skb);
0209         pr_err("%s - device in error state - dropping\n", __func__);
0210         return -EIO;
0211     }
0212     error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
0213     if (error < 0)
0214         kfree_skb(skb);
0215     else if (error == NET_XMIT_DROP)
0216         return -ENOMEM;
0217     return error < 0 ? error : 0;
0218 }
0219 
0220 int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
0221 {
0222     int error = 0;
0223 
0224     if (c4iw_fatal_error(rdev)) {
0225         kfree_skb(skb);
0226         pr_err("%s - device in error state - dropping\n", __func__);
0227         return -EIO;
0228     }
0229     error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
0230     if (error < 0)
0231         kfree_skb(skb);
0232     return error < 0 ? error : 0;
0233 }
0234 
0235 static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
0236 {
0237     u32 len = roundup(sizeof(struct cpl_tid_release), 16);
0238 
0239     skb = get_skb(skb, len, GFP_KERNEL);
0240     if (!skb)
0241         return;
0242 
0243     cxgb_mk_tid_release(skb, len, hwtid, 0);
0244     c4iw_ofld_send(rdev, skb);
0245     return;
0246 }
0247 
0248 static void set_emss(struct c4iw_ep *ep, u16 opt)
0249 {
0250     ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] -
0251            ((AF_INET == ep->com.remote_addr.ss_family) ?
0252             sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
0253            sizeof(struct tcphdr);
0254     ep->mss = ep->emss;
0255     if (TCPOPT_TSTAMP_G(opt))
0256         ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
0257     if (ep->emss < 128)
0258         ep->emss = 128;
0259     if (ep->emss & 7)
0260         pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
0261              TCPOPT_MSS_G(opt), ep->mss, ep->emss);
0262     pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss,
0263          ep->emss);
0264 }
0265 
0266 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
0267 {
0268     enum c4iw_ep_state state;
0269 
0270     mutex_lock(&epc->mutex);
0271     state = epc->state;
0272     mutex_unlock(&epc->mutex);
0273     return state;
0274 }
0275 
0276 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
0277 {
0278     epc->state = new;
0279 }
0280 
0281 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
0282 {
0283     mutex_lock(&epc->mutex);
0284     pr_debug("%s -> %s\n", states[epc->state], states[new]);
0285     __state_set(epc, new);
0286     mutex_unlock(&epc->mutex);
0287     return;
0288 }
0289 
0290 static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
0291 {
0292     struct sk_buff *skb;
0293     unsigned int i;
0294     size_t len;
0295 
0296     len = roundup(sizeof(union cpl_wr_size), 16);
0297     for (i = 0; i < size; i++) {
0298         skb = alloc_skb(len, GFP_KERNEL);
0299         if (!skb)
0300             goto fail;
0301         skb_queue_tail(ep_skb_list, skb);
0302     }
0303     return 0;
0304 fail:
0305     skb_queue_purge(ep_skb_list);
0306     return -ENOMEM;
0307 }
0308 
0309 static void *alloc_ep(int size, gfp_t gfp)
0310 {
0311     struct c4iw_ep_common *epc;
0312 
0313     epc = kzalloc(size, gfp);
0314     if (epc) {
0315         epc->wr_waitp = c4iw_alloc_wr_wait(gfp);
0316         if (!epc->wr_waitp) {
0317             kfree(epc);
0318             epc = NULL;
0319             goto out;
0320         }
0321         kref_init(&epc->kref);
0322         mutex_init(&epc->mutex);
0323         c4iw_init_wr_wait(epc->wr_waitp);
0324     }
0325     pr_debug("alloc ep %p\n", epc);
0326 out:
0327     return epc;
0328 }
0329 
0330 static void remove_ep_tid(struct c4iw_ep *ep)
0331 {
0332     unsigned long flags;
0333 
0334     xa_lock_irqsave(&ep->com.dev->hwtids, flags);
0335     __xa_erase(&ep->com.dev->hwtids, ep->hwtid);
0336     if (xa_empty(&ep->com.dev->hwtids))
0337         wake_up(&ep->com.dev->wait);
0338     xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
0339 }
0340 
0341 static int insert_ep_tid(struct c4iw_ep *ep)
0342 {
0343     unsigned long flags;
0344     int err;
0345 
0346     xa_lock_irqsave(&ep->com.dev->hwtids, flags);
0347     err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL);
0348     xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
0349 
0350     return err;
0351 }
0352 
0353 /*
0354  * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
0355  */
0356 static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
0357 {
0358     struct c4iw_ep *ep;
0359     unsigned long flags;
0360 
0361     xa_lock_irqsave(&dev->hwtids, flags);
0362     ep = xa_load(&dev->hwtids, tid);
0363     if (ep)
0364         c4iw_get_ep(&ep->com);
0365     xa_unlock_irqrestore(&dev->hwtids, flags);
0366     return ep;
0367 }
0368 
0369 /*
0370  * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
0371  */
0372 static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
0373                            unsigned int stid)
0374 {
0375     struct c4iw_listen_ep *ep;
0376     unsigned long flags;
0377 
0378     xa_lock_irqsave(&dev->stids, flags);
0379     ep = xa_load(&dev->stids, stid);
0380     if (ep)
0381         c4iw_get_ep(&ep->com);
0382     xa_unlock_irqrestore(&dev->stids, flags);
0383     return ep;
0384 }
0385 
0386 void _c4iw_free_ep(struct kref *kref)
0387 {
0388     struct c4iw_ep *ep;
0389 
0390     ep = container_of(kref, struct c4iw_ep, com.kref);
0391     pr_debug("ep %p state %s\n", ep, states[ep->com.state]);
0392     if (test_bit(QP_REFERENCED, &ep->com.flags))
0393         deref_qp(ep);
0394     if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
0395         if (ep->com.remote_addr.ss_family == AF_INET6) {
0396             struct sockaddr_in6 *sin6 =
0397                     (struct sockaddr_in6 *)
0398                     &ep->com.local_addr;
0399 
0400             cxgb4_clip_release(
0401                     ep->com.dev->rdev.lldi.ports[0],
0402                     (const u32 *)&sin6->sin6_addr.s6_addr,
0403                     1);
0404         }
0405         cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
0406                  ep->com.local_addr.ss_family);
0407         dst_release(ep->dst);
0408         cxgb4_l2t_release(ep->l2t);
0409         kfree_skb(ep->mpa_skb);
0410     }
0411     if (!skb_queue_empty(&ep->com.ep_skb_list))
0412         skb_queue_purge(&ep->com.ep_skb_list);
0413     c4iw_put_wr_wait(ep->com.wr_waitp);
0414     kfree(ep);
0415 }
0416 
0417 static void release_ep_resources(struct c4iw_ep *ep)
0418 {
0419     set_bit(RELEASE_RESOURCES, &ep->com.flags);
0420 
0421     /*
0422      * If we have a hwtid, then remove it from the idr table
0423      * so lookups will no longer find this endpoint.  Otherwise
0424      * we have a race where one thread finds the ep ptr just
0425      * before the other thread is freeing the ep memory.
0426      */
0427     if (ep->hwtid != -1)
0428         remove_ep_tid(ep);
0429     c4iw_put_ep(&ep->com);
0430 }
0431 
0432 static int status2errno(int status)
0433 {
0434     switch (status) {
0435     case CPL_ERR_NONE:
0436         return 0;
0437     case CPL_ERR_CONN_RESET:
0438         return -ECONNRESET;
0439     case CPL_ERR_ARP_MISS:
0440         return -EHOSTUNREACH;
0441     case CPL_ERR_CONN_TIMEDOUT:
0442         return -ETIMEDOUT;
0443     case CPL_ERR_TCAM_FULL:
0444         return -ENOMEM;
0445     case CPL_ERR_CONN_EXIST:
0446         return -EADDRINUSE;
0447     default:
0448         return -EIO;
0449     }
0450 }
0451 
0452 /*
0453  * Try and reuse skbs already allocated...
0454  */
0455 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
0456 {
0457     if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
0458         skb_trim(skb, 0);
0459         skb_get(skb);
0460         skb_reset_transport_header(skb);
0461     } else {
0462         skb = alloc_skb(len, gfp);
0463         if (!skb)
0464             return NULL;
0465     }
0466     t4_set_arp_err_handler(skb, NULL, NULL);
0467     return skb;
0468 }
0469 
0470 static struct net_device *get_real_dev(struct net_device *egress_dev)
0471 {
0472     return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
0473 }
0474 
0475 static void arp_failure_discard(void *handle, struct sk_buff *skb)
0476 {
0477     pr_err("ARP failure\n");
0478     kfree_skb(skb);
0479 }
0480 
0481 static void mpa_start_arp_failure(void *handle, struct sk_buff *skb)
0482 {
0483     pr_err("ARP failure during MPA Negotiation - Closing Connection\n");
0484 }
0485 
0486 enum {
0487     NUM_FAKE_CPLS = 2,
0488     FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0,
0489     FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1,
0490 };
0491 
0492 static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
0493 {
0494     struct c4iw_ep *ep;
0495 
0496     ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
0497     release_ep_resources(ep);
0498     return 0;
0499 }
0500 
0501 static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
0502 {
0503     struct c4iw_ep *ep;
0504 
0505     ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
0506     c4iw_put_ep(&ep->parent_ep->com);
0507     release_ep_resources(ep);
0508     return 0;
0509 }
0510 
0511 /*
0512  * Fake up a special CPL opcode and call sched() so process_work() will call
0513  * _put_ep_safe() in a safe context to free the ep resources.  This is needed
0514  * because ARP error handlers are called in an ATOMIC context, and
0515  * _c4iw_free_ep() needs to block.
0516  */
0517 static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb,
0518                   int cpl)
0519 {
0520     struct cpl_act_establish *rpl = cplhdr(skb);
0521 
0522     /* Set our special ARP_FAILURE opcode */
0523     rpl->ot.opcode = cpl;
0524 
0525     /*
0526      * Save ep in the skb->cb area, after where sched() will save the dev
0527      * ptr.
0528      */
0529     *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
0530     sched(ep->com.dev, skb);
0531 }
0532 
0533 /* Handle an ARP failure for an accept */
0534 static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
0535 {
0536     struct c4iw_ep *ep = handle;
0537 
0538     pr_err("ARP failure during accept - tid %u - dropping connection\n",
0539            ep->hwtid);
0540 
0541     __state_set(&ep->com, DEAD);
0542     queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE);
0543 }
0544 
0545 /*
0546  * Handle an ARP failure for an active open.
0547  */
0548 static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
0549 {
0550     struct c4iw_ep *ep = handle;
0551 
0552     pr_err("ARP failure during connect\n");
0553     connect_reply_upcall(ep, -EHOSTUNREACH);
0554     __state_set(&ep->com, DEAD);
0555     if (ep->com.remote_addr.ss_family == AF_INET6) {
0556         struct sockaddr_in6 *sin6 =
0557             (struct sockaddr_in6 *)&ep->com.local_addr;
0558         cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
0559                    (const u32 *)&sin6->sin6_addr.s6_addr, 1);
0560     }
0561     xa_erase_irq(&ep->com.dev->atids, ep->atid);
0562     cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
0563     queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
0564 }
0565 
0566 /*
0567  * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
0568  * and send it along.
0569  */
0570 static void abort_arp_failure(void *handle, struct sk_buff *skb)
0571 {
0572     int ret;
0573     struct c4iw_ep *ep = handle;
0574     struct c4iw_rdev *rdev = &ep->com.dev->rdev;
0575     struct cpl_abort_req *req = cplhdr(skb);
0576 
0577     pr_debug("rdev %p\n", rdev);
0578     req->cmd = CPL_ABORT_NO_RST;
0579     skb_get(skb);
0580     ret = c4iw_ofld_send(rdev, skb);
0581     if (ret) {
0582         __state_set(&ep->com, DEAD);
0583         queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
0584     } else
0585         kfree_skb(skb);
0586 }
0587 
0588 static int send_flowc(struct c4iw_ep *ep)
0589 {
0590     struct fw_flowc_wr *flowc;
0591     struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
0592     u16 vlan = ep->l2t->vlan;
0593     int nparams;
0594     int flowclen, flowclen16;
0595 
0596     if (WARN_ON(!skb))
0597         return -ENOMEM;
0598 
0599     if (vlan == CPL_L2T_VLAN_NONE)
0600         nparams = 9;
0601     else
0602         nparams = 10;
0603 
0604     flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
0605     flowclen16 = DIV_ROUND_UP(flowclen, 16);
0606     flowclen = flowclen16 * 16;
0607 
0608     flowc = __skb_put(skb, flowclen);
0609     memset(flowc, 0, flowclen);
0610 
0611     flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
0612                        FW_FLOWC_WR_NPARAMS_V(nparams));
0613     flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
0614                       FW_WR_FLOWID_V(ep->hwtid));
0615 
0616     flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
0617     flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
0618                         (ep->com.dev->rdev.lldi.pf));
0619     flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
0620     flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
0621     flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
0622     flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
0623     flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
0624     flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
0625     flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
0626     flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
0627     flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
0628     flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
0629     flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
0630     flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
0631     flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
0632     flowc->mnemval[7].val = cpu_to_be32(ep->emss);
0633     flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
0634     flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale);
0635     if (nparams == 10) {
0636         u16 pri;
0637         pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
0638         flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
0639         flowc->mnemval[9].val = cpu_to_be32(pri);
0640     }
0641 
0642     set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
0643     return c4iw_ofld_send(&ep->com.dev->rdev, skb);
0644 }
0645 
0646 static int send_halfclose(struct c4iw_ep *ep)
0647 {
0648     struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
0649     u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
0650 
0651     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
0652     if (WARN_ON(!skb))
0653         return -ENOMEM;
0654 
0655     cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
0656                   NULL, arp_failure_discard);
0657 
0658     return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
0659 }
0660 
0661 static void read_tcb(struct c4iw_ep *ep)
0662 {
0663     struct sk_buff *skb;
0664     struct cpl_get_tcb *req;
0665     int wrlen = roundup(sizeof(*req), 16);
0666 
0667     skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
0668     if (WARN_ON(!skb))
0669         return;
0670 
0671     set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
0672     req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
0673     memset(req, 0, wrlen);
0674     INIT_TP_WR(req, ep->hwtid);
0675     OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
0676     req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
0677 
0678     /*
0679      * keep a ref on the ep so the tcb is not unlocked before this
0680      * cpl completes. The ref is released in read_tcb_rpl().
0681      */
0682     c4iw_get_ep(&ep->com);
0683     if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
0684         c4iw_put_ep(&ep->com);
0685 }
0686 
0687 static int send_abort_req(struct c4iw_ep *ep)
0688 {
0689     u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
0690     struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
0691 
0692     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
0693     if (WARN_ON(!req_skb))
0694         return -ENOMEM;
0695 
0696     cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
0697               ep, abort_arp_failure);
0698 
0699     return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
0700 }
0701 
0702 static int send_abort(struct c4iw_ep *ep)
0703 {
0704     if (!ep->com.qp || !ep->com.qp->srq) {
0705         send_abort_req(ep);
0706         return 0;
0707     }
0708     set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
0709     read_tcb(ep);
0710     return 0;
0711 }
0712 
0713 static int send_connect(struct c4iw_ep *ep)
0714 {
0715     struct cpl_act_open_req *req = NULL;
0716     struct cpl_t5_act_open_req *t5req = NULL;
0717     struct cpl_t6_act_open_req *t6req = NULL;
0718     struct cpl_act_open_req6 *req6 = NULL;
0719     struct cpl_t5_act_open_req6 *t5req6 = NULL;
0720     struct cpl_t6_act_open_req6 *t6req6 = NULL;
0721     struct sk_buff *skb;
0722     u64 opt0;
0723     u32 opt2;
0724     unsigned int mtu_idx;
0725     u32 wscale;
0726     int win, sizev4, sizev6, wrlen;
0727     struct sockaddr_in *la = (struct sockaddr_in *)
0728                  &ep->com.local_addr;
0729     struct sockaddr_in *ra = (struct sockaddr_in *)
0730                  &ep->com.remote_addr;
0731     struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
0732                    &ep->com.local_addr;
0733     struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
0734                    &ep->com.remote_addr;
0735     int ret;
0736     enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
0737     u32 isn = (prandom_u32() & ~7UL) - 1;
0738     struct net_device *netdev;
0739     u64 params;
0740 
0741     netdev = ep->com.dev->rdev.lldi.ports[0];
0742 
0743     switch (CHELSIO_CHIP_VERSION(adapter_type)) {
0744     case CHELSIO_T4:
0745         sizev4 = sizeof(struct cpl_act_open_req);
0746         sizev6 = sizeof(struct cpl_act_open_req6);
0747         break;
0748     case CHELSIO_T5:
0749         sizev4 = sizeof(struct cpl_t5_act_open_req);
0750         sizev6 = sizeof(struct cpl_t5_act_open_req6);
0751         break;
0752     case CHELSIO_T6:
0753         sizev4 = sizeof(struct cpl_t6_act_open_req);
0754         sizev6 = sizeof(struct cpl_t6_act_open_req6);
0755         break;
0756     default:
0757         pr_err("T%d Chip is not supported\n",
0758                CHELSIO_CHIP_VERSION(adapter_type));
0759         return -EINVAL;
0760     }
0761 
0762     wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
0763             roundup(sizev4, 16) :
0764             roundup(sizev6, 16);
0765 
0766     pr_debug("ep %p atid %u\n", ep, ep->atid);
0767 
0768     skb = get_skb(NULL, wrlen, GFP_KERNEL);
0769     if (!skb) {
0770         pr_err("%s - failed to alloc skb\n", __func__);
0771         return -ENOMEM;
0772     }
0773     set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
0774 
0775     cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
0776               enable_tcp_timestamps,
0777               (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
0778     wscale = cxgb_compute_wscale(rcv_win);
0779 
0780     /*
0781      * Specify the largest window that will fit in opt0. The
0782      * remainder will be specified in the rx_data_ack.
0783      */
0784     win = ep->rcv_win >> 10;
0785     if (win > RCV_BUFSIZ_M)
0786         win = RCV_BUFSIZ_M;
0787 
0788     opt0 = (nocong ? NO_CONG_F : 0) |
0789            KEEP_ALIVE_F |
0790            DELACK_F |
0791            WND_SCALE_V(wscale) |
0792            MSS_IDX_V(mtu_idx) |
0793            L2T_IDX_V(ep->l2t->idx) |
0794            TX_CHAN_V(ep->tx_chan) |
0795            SMAC_SEL_V(ep->smac_idx) |
0796            DSCP_V(ep->tos >> 2) |
0797            ULP_MODE_V(ULP_MODE_TCPDDP) |
0798            RCV_BUFSIZ_V(win);
0799     opt2 = RX_CHANNEL_V(0) |
0800            CCTRL_ECN_V(enable_ecn) |
0801            RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
0802     if (enable_tcp_timestamps)
0803         opt2 |= TSTAMPS_EN_F;
0804     if (enable_tcp_sack)
0805         opt2 |= SACK_EN_F;
0806     if (wscale && enable_tcp_window_scaling)
0807         opt2 |= WND_SCALE_EN_F;
0808     if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
0809         if (peer2peer)
0810             isn += 4;
0811 
0812         opt2 |= T5_OPT_2_VALID_F;
0813         opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
0814         opt2 |= T5_ISS_F;
0815     }
0816 
0817     params = cxgb4_select_ntuple(netdev, ep->l2t);
0818 
0819     if (ep->com.remote_addr.ss_family == AF_INET6)
0820         cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
0821                    (const u32 *)&la6->sin6_addr.s6_addr, 1);
0822 
0823     t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
0824 
0825     if (ep->com.remote_addr.ss_family == AF_INET) {
0826         switch (CHELSIO_CHIP_VERSION(adapter_type)) {
0827         case CHELSIO_T4:
0828             req = skb_put(skb, wrlen);
0829             INIT_TP_WR(req, 0);
0830             break;
0831         case CHELSIO_T5:
0832             t5req = skb_put(skb, wrlen);
0833             INIT_TP_WR(t5req, 0);
0834             req = (struct cpl_act_open_req *)t5req;
0835             break;
0836         case CHELSIO_T6:
0837             t6req = skb_put(skb, wrlen);
0838             INIT_TP_WR(t6req, 0);
0839             req = (struct cpl_act_open_req *)t6req;
0840             t5req = (struct cpl_t5_act_open_req *)t6req;
0841             break;
0842         default:
0843             pr_err("T%d Chip is not supported\n",
0844                    CHELSIO_CHIP_VERSION(adapter_type));
0845             ret = -EINVAL;
0846             goto clip_release;
0847         }
0848 
0849         OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
0850                     ((ep->rss_qid<<14) | ep->atid)));
0851         req->local_port = la->sin_port;
0852         req->peer_port = ra->sin_port;
0853         req->local_ip = la->sin_addr.s_addr;
0854         req->peer_ip = ra->sin_addr.s_addr;
0855         req->opt0 = cpu_to_be64(opt0);
0856 
0857         if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
0858             req->params = cpu_to_be32(params);
0859             req->opt2 = cpu_to_be32(opt2);
0860         } else {
0861             if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
0862                 t5req->params =
0863                       cpu_to_be64(FILTER_TUPLE_V(params));
0864                 t5req->rsvd = cpu_to_be32(isn);
0865                 pr_debug("snd_isn %u\n", t5req->rsvd);
0866                 t5req->opt2 = cpu_to_be32(opt2);
0867             } else {
0868                 t6req->params =
0869                       cpu_to_be64(FILTER_TUPLE_V(params));
0870                 t6req->rsvd = cpu_to_be32(isn);
0871                 pr_debug("snd_isn %u\n", t6req->rsvd);
0872                 t6req->opt2 = cpu_to_be32(opt2);
0873             }
0874         }
0875     } else {
0876         switch (CHELSIO_CHIP_VERSION(adapter_type)) {
0877         case CHELSIO_T4:
0878             req6 = skb_put(skb, wrlen);
0879             INIT_TP_WR(req6, 0);
0880             break;
0881         case CHELSIO_T5:
0882             t5req6 = skb_put(skb, wrlen);
0883             INIT_TP_WR(t5req6, 0);
0884             req6 = (struct cpl_act_open_req6 *)t5req6;
0885             break;
0886         case CHELSIO_T6:
0887             t6req6 = skb_put(skb, wrlen);
0888             INIT_TP_WR(t6req6, 0);
0889             req6 = (struct cpl_act_open_req6 *)t6req6;
0890             t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
0891             break;
0892         default:
0893             pr_err("T%d Chip is not supported\n",
0894                    CHELSIO_CHIP_VERSION(adapter_type));
0895             ret = -EINVAL;
0896             goto clip_release;
0897         }
0898 
0899         OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
0900                     ((ep->rss_qid<<14)|ep->atid)));
0901         req6->local_port = la6->sin6_port;
0902         req6->peer_port = ra6->sin6_port;
0903         req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
0904         req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
0905         req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
0906         req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
0907         req6->opt0 = cpu_to_be64(opt0);
0908 
0909         if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
0910             req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev,
0911                                       ep->l2t));
0912             req6->opt2 = cpu_to_be32(opt2);
0913         } else {
0914             if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
0915                 t5req6->params =
0916                         cpu_to_be64(FILTER_TUPLE_V(params));
0917                 t5req6->rsvd = cpu_to_be32(isn);
0918                 pr_debug("snd_isn %u\n", t5req6->rsvd);
0919                 t5req6->opt2 = cpu_to_be32(opt2);
0920             } else {
0921                 t6req6->params =
0922                         cpu_to_be64(FILTER_TUPLE_V(params));
0923                 t6req6->rsvd = cpu_to_be32(isn);
0924                 pr_debug("snd_isn %u\n", t6req6->rsvd);
0925                 t6req6->opt2 = cpu_to_be32(opt2);
0926             }
0927 
0928         }
0929     }
0930 
0931     set_bit(ACT_OPEN_REQ, &ep->com.history);
0932     ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
0933 clip_release:
0934     if (ret && ep->com.remote_addr.ss_family == AF_INET6)
0935         cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
0936                    (const u32 *)&la6->sin6_addr.s6_addr, 1);
0937     return ret;
0938 }
0939 
0940 static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
0941             u8 mpa_rev_to_use)
0942 {
0943     int mpalen, wrlen, ret;
0944     struct fw_ofld_tx_data_wr *req;
0945     struct mpa_message *mpa;
0946     struct mpa_v2_conn_params mpa_v2_params;
0947 
0948     pr_debug("ep %p tid %u pd_len %d\n",
0949          ep, ep->hwtid, ep->plen);
0950 
0951     mpalen = sizeof(*mpa) + ep->plen;
0952     if (mpa_rev_to_use == 2)
0953         mpalen += sizeof(struct mpa_v2_conn_params);
0954     wrlen = roundup(mpalen + sizeof(*req), 16);
0955     skb = get_skb(skb, wrlen, GFP_KERNEL);
0956     if (!skb) {
0957         connect_reply_upcall(ep, -ENOMEM);
0958         return -ENOMEM;
0959     }
0960     set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
0961 
0962     req = skb_put_zero(skb, wrlen);
0963     req->op_to_immdlen = cpu_to_be32(
0964         FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
0965         FW_WR_COMPL_F |
0966         FW_WR_IMMDLEN_V(mpalen));
0967     req->flowid_len16 = cpu_to_be32(
0968         FW_WR_FLOWID_V(ep->hwtid) |
0969         FW_WR_LEN16_V(wrlen >> 4));
0970     req->plen = cpu_to_be32(mpalen);
0971     req->tunnel_to_proxy = cpu_to_be32(
0972         FW_OFLD_TX_DATA_WR_FLUSH_F |
0973         FW_OFLD_TX_DATA_WR_SHOVE_F);
0974 
0975     mpa = (struct mpa_message *)(req + 1);
0976     memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
0977 
0978     mpa->flags = 0;
0979     if (crc_enabled)
0980         mpa->flags |= MPA_CRC;
0981     if (markers_enabled) {
0982         mpa->flags |= MPA_MARKERS;
0983         ep->mpa_attr.recv_marker_enabled = 1;
0984     } else {
0985         ep->mpa_attr.recv_marker_enabled = 0;
0986     }
0987     if (mpa_rev_to_use == 2)
0988         mpa->flags |= MPA_ENHANCED_RDMA_CONN;
0989 
0990     mpa->private_data_size = htons(ep->plen);
0991     mpa->revision = mpa_rev_to_use;
0992     if (mpa_rev_to_use == 1) {
0993         ep->tried_with_mpa_v1 = 1;
0994         ep->retry_with_mpa_v1 = 0;
0995     }
0996 
0997     if (mpa_rev_to_use == 2) {
0998         mpa->private_data_size =
0999             htons(ntohs(mpa->private_data_size) +
1000                   sizeof(struct mpa_v2_conn_params));
1001         pr_debug("initiator ird %u ord %u\n", ep->ird,
1002              ep->ord);
1003         mpa_v2_params.ird = htons((u16)ep->ird);
1004         mpa_v2_params.ord = htons((u16)ep->ord);
1005 
1006         if (peer2peer) {
1007             mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1008             if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1009                 mpa_v2_params.ord |=
1010                     htons(MPA_V2_RDMA_WRITE_RTR);
1011             else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1012                 mpa_v2_params.ord |=
1013                     htons(MPA_V2_RDMA_READ_RTR);
1014         }
1015         memcpy(mpa->private_data, &mpa_v2_params,
1016                sizeof(struct mpa_v2_conn_params));
1017 
1018         if (ep->plen)
1019             memcpy(mpa->private_data +
1020                    sizeof(struct mpa_v2_conn_params),
1021                    ep->mpa_pkt + sizeof(*mpa), ep->plen);
1022     } else
1023         if (ep->plen)
1024             memcpy(mpa->private_data,
1025                     ep->mpa_pkt + sizeof(*mpa), ep->plen);
1026 
1027     /*
1028      * Reference the mpa skb.  This ensures the data area
1029      * will remain in memory until the hw acks the tx.
1030      * Function fw4_ack() will deref it.
1031      */
1032     skb_get(skb);
1033     t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
1034     ep->mpa_skb = skb;
1035     ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1036     if (ret)
1037         return ret;
1038     start_ep_timer(ep);
1039     __state_set(&ep->com, MPA_REQ_SENT);
1040     ep->mpa_attr.initiator = 1;
1041     ep->snd_seq += mpalen;
1042     return ret;
1043 }
1044 
1045 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1046 {
1047     int mpalen, wrlen;
1048     struct fw_ofld_tx_data_wr *req;
1049     struct mpa_message *mpa;
1050     struct sk_buff *skb;
1051     struct mpa_v2_conn_params mpa_v2_params;
1052 
1053     pr_debug("ep %p tid %u pd_len %d\n",
1054          ep, ep->hwtid, ep->plen);
1055 
1056     mpalen = sizeof(*mpa) + plen;
1057     if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1058         mpalen += sizeof(struct mpa_v2_conn_params);
1059     wrlen = roundup(mpalen + sizeof(*req), 16);
1060 
1061     skb = get_skb(NULL, wrlen, GFP_KERNEL);
1062     if (!skb) {
1063         pr_err("%s - cannot alloc skb!\n", __func__);
1064         return -ENOMEM;
1065     }
1066     set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1067 
1068     req = skb_put_zero(skb, wrlen);
1069     req->op_to_immdlen = cpu_to_be32(
1070         FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1071         FW_WR_COMPL_F |
1072         FW_WR_IMMDLEN_V(mpalen));
1073     req->flowid_len16 = cpu_to_be32(
1074         FW_WR_FLOWID_V(ep->hwtid) |
1075         FW_WR_LEN16_V(wrlen >> 4));
1076     req->plen = cpu_to_be32(mpalen);
1077     req->tunnel_to_proxy = cpu_to_be32(
1078         FW_OFLD_TX_DATA_WR_FLUSH_F |
1079         FW_OFLD_TX_DATA_WR_SHOVE_F);
1080 
1081     mpa = (struct mpa_message *)(req + 1);
1082     memset(mpa, 0, sizeof(*mpa));
1083     memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1084     mpa->flags = MPA_REJECT;
1085     mpa->revision = ep->mpa_attr.version;
1086     mpa->private_data_size = htons(plen);
1087 
1088     if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1089         mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1090         mpa->private_data_size =
1091             htons(ntohs(mpa->private_data_size) +
1092                   sizeof(struct mpa_v2_conn_params));
1093         mpa_v2_params.ird = htons(((u16)ep->ird) |
1094                       (peer2peer ? MPA_V2_PEER2PEER_MODEL :
1095                        0));
1096         mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1097                       (p2p_type ==
1098                        FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1099                        MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1100                        FW_RI_INIT_P2PTYPE_READ_REQ ?
1101                        MPA_V2_RDMA_READ_RTR : 0) : 0));
1102         memcpy(mpa->private_data, &mpa_v2_params,
1103                sizeof(struct mpa_v2_conn_params));
1104 
1105         if (ep->plen)
1106             memcpy(mpa->private_data +
1107                    sizeof(struct mpa_v2_conn_params), pdata, plen);
1108     } else
1109         if (plen)
1110             memcpy(mpa->private_data, pdata, plen);
1111 
1112     /*
1113      * Reference the mpa skb again.  This ensures the data area
1114      * will remain in memory until the hw acks the tx.
1115      * Function fw4_ack() will deref it.
1116      */
1117     skb_get(skb);
1118     set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1119     t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1120     ep->mpa_skb = skb;
1121     ep->snd_seq += mpalen;
1122     return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1123 }
1124 
1125 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1126 {
1127     int mpalen, wrlen;
1128     struct fw_ofld_tx_data_wr *req;
1129     struct mpa_message *mpa;
1130     struct sk_buff *skb;
1131     struct mpa_v2_conn_params mpa_v2_params;
1132 
1133     pr_debug("ep %p tid %u pd_len %d\n",
1134          ep, ep->hwtid, ep->plen);
1135 
1136     mpalen = sizeof(*mpa) + plen;
1137     if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1138         mpalen += sizeof(struct mpa_v2_conn_params);
1139     wrlen = roundup(mpalen + sizeof(*req), 16);
1140 
1141     skb = get_skb(NULL, wrlen, GFP_KERNEL);
1142     if (!skb) {
1143         pr_err("%s - cannot alloc skb!\n", __func__);
1144         return -ENOMEM;
1145     }
1146     set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1147 
1148     req = skb_put_zero(skb, wrlen);
1149     req->op_to_immdlen = cpu_to_be32(
1150         FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1151         FW_WR_COMPL_F |
1152         FW_WR_IMMDLEN_V(mpalen));
1153     req->flowid_len16 = cpu_to_be32(
1154         FW_WR_FLOWID_V(ep->hwtid) |
1155         FW_WR_LEN16_V(wrlen >> 4));
1156     req->plen = cpu_to_be32(mpalen);
1157     req->tunnel_to_proxy = cpu_to_be32(
1158         FW_OFLD_TX_DATA_WR_FLUSH_F |
1159         FW_OFLD_TX_DATA_WR_SHOVE_F);
1160 
1161     mpa = (struct mpa_message *)(req + 1);
1162     memset(mpa, 0, sizeof(*mpa));
1163     memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1164     mpa->flags = 0;
1165     if (ep->mpa_attr.crc_enabled)
1166         mpa->flags |= MPA_CRC;
1167     if (ep->mpa_attr.recv_marker_enabled)
1168         mpa->flags |= MPA_MARKERS;
1169     mpa->revision = ep->mpa_attr.version;
1170     mpa->private_data_size = htons(plen);
1171 
1172     if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1173         mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1174         mpa->private_data_size =
1175             htons(ntohs(mpa->private_data_size) +
1176                   sizeof(struct mpa_v2_conn_params));
1177         mpa_v2_params.ird = htons((u16)ep->ird);
1178         mpa_v2_params.ord = htons((u16)ep->ord);
1179         if (peer2peer && (ep->mpa_attr.p2p_type !=
1180                     FW_RI_INIT_P2PTYPE_DISABLED)) {
1181             mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1182 
1183             if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1184                 mpa_v2_params.ord |=
1185                     htons(MPA_V2_RDMA_WRITE_RTR);
1186             else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1187                 mpa_v2_params.ord |=
1188                     htons(MPA_V2_RDMA_READ_RTR);
1189         }
1190 
1191         memcpy(mpa->private_data, &mpa_v2_params,
1192                sizeof(struct mpa_v2_conn_params));
1193 
1194         if (ep->plen)
1195             memcpy(mpa->private_data +
1196                    sizeof(struct mpa_v2_conn_params), pdata, plen);
1197     } else
1198         if (plen)
1199             memcpy(mpa->private_data, pdata, plen);
1200 
1201     /*
1202      * Reference the mpa skb.  This ensures the data area
1203      * will remain in memory until the hw acks the tx.
1204      * Function fw4_ack() will deref it.
1205      */
1206     skb_get(skb);
1207     t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1208     ep->mpa_skb = skb;
1209     __state_set(&ep->com, MPA_REP_SENT);
1210     ep->snd_seq += mpalen;
1211     return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1212 }
1213 
1214 static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1215 {
1216     struct c4iw_ep *ep;
1217     struct cpl_act_establish *req = cplhdr(skb);
1218     unsigned short tcp_opt = ntohs(req->tcp_opt);
1219     unsigned int tid = GET_TID(req);
1220     unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1221     struct tid_info *t = dev->rdev.lldi.tids;
1222     int ret;
1223 
1224     ep = lookup_atid(t, atid);
1225 
1226     pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid,
1227          be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
1228 
1229     mutex_lock(&ep->com.mutex);
1230     dst_confirm(ep->dst);
1231 
1232     /* setup the hwtid for this connection */
1233     ep->hwtid = tid;
1234     cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family);
1235     insert_ep_tid(ep);
1236 
1237     ep->snd_seq = be32_to_cpu(req->snd_isn);
1238     ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1239     ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
1240 
1241     set_emss(ep, tcp_opt);
1242 
1243     /* dealloc the atid */
1244     xa_erase_irq(&ep->com.dev->atids, atid);
1245     cxgb4_free_atid(t, atid);
1246     set_bit(ACT_ESTAB, &ep->com.history);
1247 
1248     /* start MPA negotiation */
1249     ret = send_flowc(ep);
1250     if (ret)
1251         goto err;
1252     if (ep->retry_with_mpa_v1)
1253         ret = send_mpa_req(ep, skb, 1);
1254     else
1255         ret = send_mpa_req(ep, skb, mpa_rev);
1256     if (ret)
1257         goto err;
1258     mutex_unlock(&ep->com.mutex);
1259     return 0;
1260 err:
1261     mutex_unlock(&ep->com.mutex);
1262     connect_reply_upcall(ep, -ENOMEM);
1263     c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1264     return 0;
1265 }
1266 
1267 static void close_complete_upcall(struct c4iw_ep *ep, int status)
1268 {
1269     struct iw_cm_event event;
1270 
1271     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1272     memset(&event, 0, sizeof(event));
1273     event.event = IW_CM_EVENT_CLOSE;
1274     event.status = status;
1275     if (ep->com.cm_id) {
1276         pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
1277              ep, ep->com.cm_id, ep->hwtid);
1278         ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1279         deref_cm_id(&ep->com);
1280         set_bit(CLOSE_UPCALL, &ep->com.history);
1281     }
1282 }
1283 
1284 static void peer_close_upcall(struct c4iw_ep *ep)
1285 {
1286     struct iw_cm_event event;
1287 
1288     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1289     memset(&event, 0, sizeof(event));
1290     event.event = IW_CM_EVENT_DISCONNECT;
1291     if (ep->com.cm_id) {
1292         pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
1293              ep, ep->com.cm_id, ep->hwtid);
1294         ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1295         set_bit(DISCONN_UPCALL, &ep->com.history);
1296     }
1297 }
1298 
1299 static void peer_abort_upcall(struct c4iw_ep *ep)
1300 {
1301     struct iw_cm_event event;
1302 
1303     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1304     memset(&event, 0, sizeof(event));
1305     event.event = IW_CM_EVENT_CLOSE;
1306     event.status = -ECONNRESET;
1307     if (ep->com.cm_id) {
1308         pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
1309              ep->com.cm_id, ep->hwtid);
1310         ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1311         deref_cm_id(&ep->com);
1312         set_bit(ABORT_UPCALL, &ep->com.history);
1313     }
1314 }
1315 
1316 static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1317 {
1318     struct iw_cm_event event;
1319 
1320     pr_debug("ep %p tid %u status %d\n",
1321          ep, ep->hwtid, status);
1322     memset(&event, 0, sizeof(event));
1323     event.event = IW_CM_EVENT_CONNECT_REPLY;
1324     event.status = status;
1325     memcpy(&event.local_addr, &ep->com.local_addr,
1326            sizeof(ep->com.local_addr));
1327     memcpy(&event.remote_addr, &ep->com.remote_addr,
1328            sizeof(ep->com.remote_addr));
1329 
1330     if ((status == 0) || (status == -ECONNREFUSED)) {
1331         if (!ep->tried_with_mpa_v1) {
1332             /* this means MPA_v2 is used */
1333             event.ord = ep->ird;
1334             event.ird = ep->ord;
1335             event.private_data_len = ep->plen -
1336                 sizeof(struct mpa_v2_conn_params);
1337             event.private_data = ep->mpa_pkt +
1338                 sizeof(struct mpa_message) +
1339                 sizeof(struct mpa_v2_conn_params);
1340         } else {
1341             /* this means MPA_v1 is used */
1342             event.ord = cur_max_read_depth(ep->com.dev);
1343             event.ird = cur_max_read_depth(ep->com.dev);
1344             event.private_data_len = ep->plen;
1345             event.private_data = ep->mpa_pkt +
1346                 sizeof(struct mpa_message);
1347         }
1348     }
1349 
1350     pr_debug("ep %p tid %u status %d\n", ep,
1351          ep->hwtid, status);
1352     set_bit(CONN_RPL_UPCALL, &ep->com.history);
1353     ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1354 
1355     if (status < 0)
1356         deref_cm_id(&ep->com);
1357 }
1358 
1359 static int connect_request_upcall(struct c4iw_ep *ep)
1360 {
1361     struct iw_cm_event event;
1362     int ret;
1363 
1364     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1365     memset(&event, 0, sizeof(event));
1366     event.event = IW_CM_EVENT_CONNECT_REQUEST;
1367     memcpy(&event.local_addr, &ep->com.local_addr,
1368            sizeof(ep->com.local_addr));
1369     memcpy(&event.remote_addr, &ep->com.remote_addr,
1370            sizeof(ep->com.remote_addr));
1371     event.provider_data = ep;
1372     if (!ep->tried_with_mpa_v1) {
1373         /* this means MPA_v2 is used */
1374         event.ord = ep->ord;
1375         event.ird = ep->ird;
1376         event.private_data_len = ep->plen -
1377             sizeof(struct mpa_v2_conn_params);
1378         event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1379             sizeof(struct mpa_v2_conn_params);
1380     } else {
1381         /* this means MPA_v1 is used. Send max supported */
1382         event.ord = cur_max_read_depth(ep->com.dev);
1383         event.ird = cur_max_read_depth(ep->com.dev);
1384         event.private_data_len = ep->plen;
1385         event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1386     }
1387     c4iw_get_ep(&ep->com);
1388     ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1389                               &event);
1390     if (ret)
1391         c4iw_put_ep(&ep->com);
1392     set_bit(CONNREQ_UPCALL, &ep->com.history);
1393     c4iw_put_ep(&ep->parent_ep->com);
1394     return ret;
1395 }
1396 
1397 static void established_upcall(struct c4iw_ep *ep)
1398 {
1399     struct iw_cm_event event;
1400 
1401     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1402     memset(&event, 0, sizeof(event));
1403     event.event = IW_CM_EVENT_ESTABLISHED;
1404     event.ird = ep->ord;
1405     event.ord = ep->ird;
1406     if (ep->com.cm_id) {
1407         pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1408         ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1409         set_bit(ESTAB_UPCALL, &ep->com.history);
1410     }
1411 }
1412 
1413 static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1414 {
1415     struct sk_buff *skb;
1416     u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
1417     u32 credit_dack;
1418 
1419     pr_debug("ep %p tid %u credits %u\n",
1420          ep, ep->hwtid, credits);
1421     skb = get_skb(NULL, wrlen, GFP_KERNEL);
1422     if (!skb) {
1423         pr_err("update_rx_credits - cannot alloc skb!\n");
1424         return 0;
1425     }
1426 
1427     /*
1428      * If we couldn't specify the entire rcv window at connection setup
1429      * due to the limit in the number of bits in the RCV_BUFSIZ field,
1430      * then add the overage in to the credits returned.
1431      */
1432     if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
1433         credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
1434 
1435     credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
1436               RX_DACK_MODE_V(dack_mode);
1437 
1438     cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
1439                 credit_dack);
1440 
1441     c4iw_ofld_send(&ep->com.dev->rdev, skb);
1442     return credits;
1443 }
1444 
1445 #define RELAXED_IRD_NEGOTIATION 1
1446 
1447 /*
1448  * process_mpa_reply - process streaming mode MPA reply
1449  *
1450  * Returns:
1451  *
1452  * 0 upon success indicating a connect request was delivered to the ULP
1453  * or the mpa request is incomplete but valid so far.
1454  *
1455  * 1 if a failure requires the caller to close the connection.
1456  *
1457  * 2 if a failure requires the caller to abort the connection.
1458  */
1459 static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1460 {
1461     struct mpa_message *mpa;
1462     struct mpa_v2_conn_params *mpa_v2_params;
1463     u16 plen;
1464     u16 resp_ird, resp_ord;
1465     u8 rtr_mismatch = 0, insuff_ird = 0;
1466     struct c4iw_qp_attributes attrs;
1467     enum c4iw_qp_attr_mask mask;
1468     int err;
1469     int disconnect = 0;
1470 
1471     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1472 
1473     /*
1474      * If we get more than the supported amount of private data
1475      * then we must fail this connection.
1476      */
1477     if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1478         err = -EINVAL;
1479         goto err_stop_timer;
1480     }
1481 
1482     /*
1483      * copy the new data into our accumulation buffer.
1484      */
1485     skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1486                   skb->len);
1487     ep->mpa_pkt_len += skb->len;
1488 
1489     /*
1490      * if we don't even have the mpa message, then bail.
1491      */
1492     if (ep->mpa_pkt_len < sizeof(*mpa))
1493         return 0;
1494     mpa = (struct mpa_message *) ep->mpa_pkt;
1495 
1496     /* Validate MPA header. */
1497     if (mpa->revision > mpa_rev) {
1498         pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1499                __func__, mpa_rev, mpa->revision);
1500         err = -EPROTO;
1501         goto err_stop_timer;
1502     }
1503     if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1504         err = -EPROTO;
1505         goto err_stop_timer;
1506     }
1507 
1508     plen = ntohs(mpa->private_data_size);
1509 
1510     /*
1511      * Fail if there's too much private data.
1512      */
1513     if (plen > MPA_MAX_PRIVATE_DATA) {
1514         err = -EPROTO;
1515         goto err_stop_timer;
1516     }
1517 
1518     /*
1519      * If plen does not account for pkt size
1520      */
1521     if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1522         err = -EPROTO;
1523         goto err_stop_timer;
1524     }
1525 
1526     ep->plen = (u8) plen;
1527 
1528     /*
1529      * If we don't have all the pdata yet, then bail.
1530      * We'll continue process when more data arrives.
1531      */
1532     if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1533         return 0;
1534 
1535     if (mpa->flags & MPA_REJECT) {
1536         err = -ECONNREFUSED;
1537         goto err_stop_timer;
1538     }
1539 
1540     /*
1541      * Stop mpa timer.  If it expired, then
1542      * we ignore the MPA reply.  process_timeout()
1543      * will abort the connection.
1544      */
1545     if (stop_ep_timer(ep))
1546         return 0;
1547 
1548     /*
1549      * If we get here we have accumulated the entire mpa
1550      * start reply message including private data. And
1551      * the MPA header is valid.
1552      */
1553     __state_set(&ep->com, FPDU_MODE);
1554     ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1555     ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1556     ep->mpa_attr.version = mpa->revision;
1557     ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1558 
1559     if (mpa->revision == 2) {
1560         ep->mpa_attr.enhanced_rdma_conn =
1561             mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1562         if (ep->mpa_attr.enhanced_rdma_conn) {
1563             mpa_v2_params = (struct mpa_v2_conn_params *)
1564                 (ep->mpa_pkt + sizeof(*mpa));
1565             resp_ird = ntohs(mpa_v2_params->ird) &
1566                 MPA_V2_IRD_ORD_MASK;
1567             resp_ord = ntohs(mpa_v2_params->ord) &
1568                 MPA_V2_IRD_ORD_MASK;
1569             pr_debug("responder ird %u ord %u ep ird %u ord %u\n",
1570                  resp_ird, resp_ord, ep->ird, ep->ord);
1571 
1572             /*
1573              * This is a double-check. Ideally, below checks are
1574              * not required since ird/ord stuff has been taken
1575              * care of in c4iw_accept_cr
1576              */
1577             if (ep->ird < resp_ord) {
1578                 if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1579                     ep->com.dev->rdev.lldi.max_ordird_qp)
1580                     ep->ird = resp_ord;
1581                 else
1582                     insuff_ird = 1;
1583             } else if (ep->ird > resp_ord) {
1584                 ep->ird = resp_ord;
1585             }
1586             if (ep->ord > resp_ird) {
1587                 if (RELAXED_IRD_NEGOTIATION)
1588                     ep->ord = resp_ird;
1589                 else
1590                     insuff_ird = 1;
1591             }
1592             if (insuff_ird) {
1593                 err = -ENOMEM;
1594                 ep->ird = resp_ord;
1595                 ep->ord = resp_ird;
1596             }
1597 
1598             if (ntohs(mpa_v2_params->ird) &
1599                     MPA_V2_PEER2PEER_MODEL) {
1600                 if (ntohs(mpa_v2_params->ord) &
1601                         MPA_V2_RDMA_WRITE_RTR)
1602                     ep->mpa_attr.p2p_type =
1603                         FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1604                 else if (ntohs(mpa_v2_params->ord) &
1605                         MPA_V2_RDMA_READ_RTR)
1606                     ep->mpa_attr.p2p_type =
1607                         FW_RI_INIT_P2PTYPE_READ_REQ;
1608             }
1609         }
1610     } else if (mpa->revision == 1)
1611         if (peer2peer)
1612             ep->mpa_attr.p2p_type = p2p_type;
1613 
1614     pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
1615          ep->mpa_attr.crc_enabled,
1616          ep->mpa_attr.recv_marker_enabled,
1617          ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1618          ep->mpa_attr.p2p_type, p2p_type);
1619 
1620     /*
1621      * If responder's RTR does not match with that of initiator, assign
1622      * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1623      * generated when moving QP to RTS state.
1624      * A TERM message will be sent after QP has moved to RTS state
1625      */
1626     if ((ep->mpa_attr.version == 2) && peer2peer &&
1627             (ep->mpa_attr.p2p_type != p2p_type)) {
1628         ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1629         rtr_mismatch = 1;
1630     }
1631 
1632     attrs.mpa_attr = ep->mpa_attr;
1633     attrs.max_ird = ep->ird;
1634     attrs.max_ord = ep->ord;
1635     attrs.llp_stream_handle = ep;
1636     attrs.next_state = C4IW_QP_STATE_RTS;
1637 
1638     mask = C4IW_QP_ATTR_NEXT_STATE |
1639         C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1640         C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1641 
1642     /* bind QP and TID with INIT_WR */
1643     err = c4iw_modify_qp(ep->com.qp->rhp,
1644                  ep->com.qp, mask, &attrs, 1);
1645     if (err)
1646         goto err;
1647 
1648     /*
1649      * If responder's RTR requirement did not match with what initiator
1650      * supports, generate TERM message
1651      */
1652     if (rtr_mismatch) {
1653         pr_err("%s: RTR mismatch, sending TERM\n", __func__);
1654         attrs.layer_etype = LAYER_MPA | DDP_LLP;
1655         attrs.ecode = MPA_NOMATCH_RTR;
1656         attrs.next_state = C4IW_QP_STATE_TERMINATE;
1657         attrs.send_term = 1;
1658         err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1659                 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1660         err = -ENOMEM;
1661         disconnect = 1;
1662         goto out;
1663     }
1664 
1665     /*
1666      * Generate TERM if initiator IRD is not sufficient for responder
1667      * provided ORD. Currently, we do the same behaviour even when
1668      * responder provided IRD is also not sufficient as regards to
1669      * initiator ORD.
1670      */
1671     if (insuff_ird) {
1672         pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
1673         attrs.layer_etype = LAYER_MPA | DDP_LLP;
1674         attrs.ecode = MPA_INSUFF_IRD;
1675         attrs.next_state = C4IW_QP_STATE_TERMINATE;
1676         attrs.send_term = 1;
1677         err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1678                 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1679         err = -ENOMEM;
1680         disconnect = 1;
1681         goto out;
1682     }
1683     goto out;
1684 err_stop_timer:
1685     stop_ep_timer(ep);
1686 err:
1687     disconnect = 2;
1688 out:
1689     connect_reply_upcall(ep, err);
1690     return disconnect;
1691 }
1692 
1693 /*
1694  * process_mpa_request - process streaming mode MPA request
1695  *
1696  * Returns:
1697  *
1698  * 0 upon success indicating a connect request was delivered to the ULP
1699  * or the mpa request is incomplete but valid so far.
1700  *
1701  * 1 if a failure requires the caller to close the connection.
1702  *
1703  * 2 if a failure requires the caller to abort the connection.
1704  */
1705 static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1706 {
1707     struct mpa_message *mpa;
1708     struct mpa_v2_conn_params *mpa_v2_params;
1709     u16 plen;
1710 
1711     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1712 
1713     /*
1714      * If we get more than the supported amount of private data
1715      * then we must fail this connection.
1716      */
1717     if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
1718         goto err_stop_timer;
1719 
1720     pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1721 
1722     /*
1723      * Copy the new data into our accumulation buffer.
1724      */
1725     skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1726                   skb->len);
1727     ep->mpa_pkt_len += skb->len;
1728 
1729     /*
1730      * If we don't even have the mpa message, then bail.
1731      * We'll continue process when more data arrives.
1732      */
1733     if (ep->mpa_pkt_len < sizeof(*mpa))
1734         return 0;
1735 
1736     pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1737     mpa = (struct mpa_message *) ep->mpa_pkt;
1738 
1739     /*
1740      * Validate MPA Header.
1741      */
1742     if (mpa->revision > mpa_rev) {
1743         pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1744                __func__, mpa_rev, mpa->revision);
1745         goto err_stop_timer;
1746     }
1747 
1748     if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1749         goto err_stop_timer;
1750 
1751     plen = ntohs(mpa->private_data_size);
1752 
1753     /*
1754      * Fail if there's too much private data.
1755      */
1756     if (plen > MPA_MAX_PRIVATE_DATA)
1757         goto err_stop_timer;
1758 
1759     /*
1760      * If plen does not account for pkt size
1761      */
1762     if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1763         goto err_stop_timer;
1764     ep->plen = (u8) plen;
1765 
1766     /*
1767      * If we don't have all the pdata yet, then bail.
1768      */
1769     if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1770         return 0;
1771 
1772     /*
1773      * If we get here we have accumulated the entire mpa
1774      * start reply message including private data.
1775      */
1776     ep->mpa_attr.initiator = 0;
1777     ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1778     ep->mpa_attr.recv_marker_enabled = markers_enabled;
1779     ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1780     ep->mpa_attr.version = mpa->revision;
1781     if (mpa->revision == 1)
1782         ep->tried_with_mpa_v1 = 1;
1783     ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1784 
1785     if (mpa->revision == 2) {
1786         ep->mpa_attr.enhanced_rdma_conn =
1787             mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1788         if (ep->mpa_attr.enhanced_rdma_conn) {
1789             mpa_v2_params = (struct mpa_v2_conn_params *)
1790                 (ep->mpa_pkt + sizeof(*mpa));
1791             ep->ird = ntohs(mpa_v2_params->ird) &
1792                 MPA_V2_IRD_ORD_MASK;
1793             ep->ird = min_t(u32, ep->ird,
1794                     cur_max_read_depth(ep->com.dev));
1795             ep->ord = ntohs(mpa_v2_params->ord) &
1796                 MPA_V2_IRD_ORD_MASK;
1797             ep->ord = min_t(u32, ep->ord,
1798                     cur_max_read_depth(ep->com.dev));
1799             pr_debug("initiator ird %u ord %u\n",
1800                  ep->ird, ep->ord);
1801             if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1802                 if (peer2peer) {
1803                     if (ntohs(mpa_v2_params->ord) &
1804                             MPA_V2_RDMA_WRITE_RTR)
1805                         ep->mpa_attr.p2p_type =
1806                         FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1807                     else if (ntohs(mpa_v2_params->ord) &
1808                             MPA_V2_RDMA_READ_RTR)
1809                         ep->mpa_attr.p2p_type =
1810                         FW_RI_INIT_P2PTYPE_READ_REQ;
1811                 }
1812         }
1813     } else if (mpa->revision == 1)
1814         if (peer2peer)
1815             ep->mpa_attr.p2p_type = p2p_type;
1816 
1817     pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
1818          ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1819          ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1820          ep->mpa_attr.p2p_type);
1821 
1822     __state_set(&ep->com, MPA_REQ_RCVD);
1823 
1824     /* drive upcall */
1825     mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
1826     if (ep->parent_ep->com.state != DEAD) {
1827         if (connect_request_upcall(ep))
1828             goto err_unlock_parent;
1829     } else {
1830         goto err_unlock_parent;
1831     }
1832     mutex_unlock(&ep->parent_ep->com.mutex);
1833     return 0;
1834 
1835 err_unlock_parent:
1836     mutex_unlock(&ep->parent_ep->com.mutex);
1837     goto err_out;
1838 err_stop_timer:
1839     (void)stop_ep_timer(ep);
1840 err_out:
1841     return 2;
1842 }
1843 
1844 static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1845 {
1846     struct c4iw_ep *ep;
1847     struct cpl_rx_data *hdr = cplhdr(skb);
1848     unsigned int dlen = ntohs(hdr->len);
1849     unsigned int tid = GET_TID(hdr);
1850     __u8 status = hdr->status;
1851     int disconnect = 0;
1852 
1853     ep = get_ep_from_tid(dev, tid);
1854     if (!ep)
1855         return 0;
1856     pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen);
1857     skb_pull(skb, sizeof(*hdr));
1858     skb_trim(skb, dlen);
1859     mutex_lock(&ep->com.mutex);
1860 
1861     switch (ep->com.state) {
1862     case MPA_REQ_SENT:
1863         update_rx_credits(ep, dlen);
1864         ep->rcv_seq += dlen;
1865         disconnect = process_mpa_reply(ep, skb);
1866         break;
1867     case MPA_REQ_WAIT:
1868         update_rx_credits(ep, dlen);
1869         ep->rcv_seq += dlen;
1870         disconnect = process_mpa_request(ep, skb);
1871         break;
1872     case FPDU_MODE: {
1873         struct c4iw_qp_attributes attrs;
1874 
1875         update_rx_credits(ep, dlen);
1876         if (status)
1877             pr_err("%s Unexpected streaming data." \
1878                    " qpid %u ep %p state %d tid %u status %d\n",
1879                    __func__, ep->com.qp->wq.sq.qid, ep,
1880                    ep->com.state, ep->hwtid, status);
1881         attrs.next_state = C4IW_QP_STATE_TERMINATE;
1882         c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1883                    C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1884         disconnect = 1;
1885         break;
1886     }
1887     default:
1888         break;
1889     }
1890     mutex_unlock(&ep->com.mutex);
1891     if (disconnect)
1892         c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
1893     c4iw_put_ep(&ep->com);
1894     return 0;
1895 }
1896 
1897 static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
1898 {
1899     enum chip_type adapter_type;
1900 
1901     adapter_type = ep->com.dev->rdev.lldi.adapter_type;
1902 
1903     /*
1904      * If this TCB had a srq buffer cached, then we must complete
1905      * it. For user mode, that means saving the srqidx in the
1906      * user/kernel status page for this qp.  For kernel mode, just
1907      * synthesize the CQE now.
1908      */
1909     if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
1910         if (ep->com.qp->ibqp.uobject)
1911             t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
1912         else
1913             c4iw_flush_srqidx(ep->com.qp, srqidx);
1914     }
1915 }
1916 
1917 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1918 {
1919     u32 srqidx;
1920     struct c4iw_ep *ep;
1921     struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
1922     int release = 0;
1923     unsigned int tid = GET_TID(rpl);
1924 
1925     ep = get_ep_from_tid(dev, tid);
1926     if (!ep) {
1927         pr_warn("Abort rpl to freed endpoint\n");
1928         return 0;
1929     }
1930 
1931     if (ep->com.qp && ep->com.qp->srq) {
1932         srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
1933         complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
1934     }
1935 
1936     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1937     mutex_lock(&ep->com.mutex);
1938     switch (ep->com.state) {
1939     case ABORTING:
1940         c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
1941         __state_set(&ep->com, DEAD);
1942         release = 1;
1943         break;
1944     default:
1945         pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
1946         break;
1947     }
1948     mutex_unlock(&ep->com.mutex);
1949 
1950     if (release) {
1951         close_complete_upcall(ep, -ECONNRESET);
1952         release_ep_resources(ep);
1953     }
1954     c4iw_put_ep(&ep->com);
1955     return 0;
1956 }
1957 
1958 static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1959 {
1960     struct sk_buff *skb;
1961     struct fw_ofld_connection_wr *req;
1962     unsigned int mtu_idx;
1963     u32 wscale;
1964     struct sockaddr_in *sin;
1965     int win;
1966 
1967     skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1968     req = __skb_put_zero(skb, sizeof(*req));
1969     req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
1970     req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
1971     req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
1972                      ep->com.dev->rdev.lldi.ports[0],
1973                      ep->l2t));
1974     sin = (struct sockaddr_in *)&ep->com.local_addr;
1975     req->le.lport = sin->sin_port;
1976     req->le.u.ipv4.lip = sin->sin_addr.s_addr;
1977     sin = (struct sockaddr_in *)&ep->com.remote_addr;
1978     req->le.pport = sin->sin_port;
1979     req->le.u.ipv4.pip = sin->sin_addr.s_addr;
1980     req->tcb.t_state_to_astid =
1981             htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) |
1982             FW_OFLD_CONNECTION_WR_ASTID_V(atid));
1983     req->tcb.cplrxdataack_cplpassacceptrpl =
1984             htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
1985     req->tcb.tx_max = (__force __be32) jiffies;
1986     req->tcb.rcv_adv = htons(1);
1987     cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
1988               enable_tcp_timestamps,
1989               (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
1990     wscale = cxgb_compute_wscale(rcv_win);
1991 
1992     /*
1993      * Specify the largest window that will fit in opt0. The
1994      * remainder will be specified in the rx_data_ack.
1995      */
1996     win = ep->rcv_win >> 10;
1997     if (win > RCV_BUFSIZ_M)
1998         win = RCV_BUFSIZ_M;
1999 
2000     req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F |
2001         (nocong ? NO_CONG_F : 0) |
2002         KEEP_ALIVE_F |
2003         DELACK_F |
2004         WND_SCALE_V(wscale) |
2005         MSS_IDX_V(mtu_idx) |
2006         L2T_IDX_V(ep->l2t->idx) |
2007         TX_CHAN_V(ep->tx_chan) |
2008         SMAC_SEL_V(ep->smac_idx) |
2009         DSCP_V(ep->tos >> 2) |
2010         ULP_MODE_V(ULP_MODE_TCPDDP) |
2011         RCV_BUFSIZ_V(win));
2012     req->tcb.opt2 = (__force __be32) (PACE_V(1) |
2013         TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
2014         RX_CHANNEL_V(0) |
2015         CCTRL_ECN_V(enable_ecn) |
2016         RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid));
2017     if (enable_tcp_timestamps)
2018         req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F;
2019     if (enable_tcp_sack)
2020         req->tcb.opt2 |= (__force __be32)SACK_EN_F;
2021     if (wscale && enable_tcp_window_scaling)
2022         req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F;
2023     req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0);
2024     req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
2025     set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
2026     set_bit(ACT_OFLD_CONN, &ep->com.history);
2027     return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2028 }
2029 
2030 /*
2031  * Some of the error codes above implicitly indicate that there is no TID
2032  * allocated with the result of an ACT_OPEN.  We use this predicate to make
2033  * that explicit.
2034  */
2035 static inline int act_open_has_tid(int status)
2036 {
2037     return (status != CPL_ERR_TCAM_PARITY &&
2038         status != CPL_ERR_TCAM_MISS &&
2039         status != CPL_ERR_TCAM_FULL &&
2040         status != CPL_ERR_CONN_EXIST_SYNRECV &&
2041         status != CPL_ERR_CONN_EXIST);
2042 }
2043 
2044 static char *neg_adv_str(unsigned int status)
2045 {
2046     switch (status) {
2047     case CPL_ERR_RTX_NEG_ADVICE:
2048         return "Retransmit timeout";
2049     case CPL_ERR_PERSIST_NEG_ADVICE:
2050         return "Persist timeout";
2051     case CPL_ERR_KEEPALV_NEG_ADVICE:
2052         return "Keepalive timeout";
2053     default:
2054         return "Unknown";
2055     }
2056 }
2057 
2058 static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
2059 {
2060     ep->snd_win = snd_win;
2061     ep->rcv_win = rcv_win;
2062     pr_debug("snd_win %d rcv_win %d\n",
2063          ep->snd_win, ep->rcv_win);
2064 }
2065 
2066 #define ACT_OPEN_RETRY_COUNT 2
2067 
2068 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
2069              struct dst_entry *dst, struct c4iw_dev *cdev,
2070              bool clear_mpa_v1, enum chip_type adapter_type, u8 tos)
2071 {
2072     struct neighbour *n;
2073     int err, step;
2074     struct net_device *pdev;
2075 
2076     n = dst_neigh_lookup(dst, peer_ip);
2077     if (!n)
2078         return -ENODEV;
2079 
2080     rcu_read_lock();
2081     err = -ENOMEM;
2082     if (n->dev->flags & IFF_LOOPBACK) {
2083         if (iptype == 4)
2084             pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
2085         else if (IS_ENABLED(CONFIG_IPV6))
2086             for_each_netdev(&init_net, pdev) {
2087                 if (ipv6_chk_addr(&init_net,
2088                           (struct in6_addr *)peer_ip,
2089                           pdev, 1))
2090                     break;
2091             }
2092         else
2093             pdev = NULL;
2094 
2095         if (!pdev) {
2096             err = -ENODEV;
2097             goto out;
2098         }
2099         ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2100                     n, pdev, rt_tos2priority(tos));
2101         if (!ep->l2t) {
2102             dev_put(pdev);
2103             goto out;
2104         }
2105         ep->mtu = pdev->mtu;
2106         ep->tx_chan = cxgb4_port_chan(pdev);
2107         ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2108         step = cdev->rdev.lldi.ntxq /
2109             cdev->rdev.lldi.nchan;
2110         ep->txq_idx = cxgb4_port_idx(pdev) * step;
2111         step = cdev->rdev.lldi.nrxq /
2112             cdev->rdev.lldi.nchan;
2113         ep->ctrlq_idx = cxgb4_port_idx(pdev);
2114         ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2115             cxgb4_port_idx(pdev) * step];
2116         set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2117         dev_put(pdev);
2118     } else {
2119         pdev = get_real_dev(n->dev);
2120         ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2121                     n, pdev, rt_tos2priority(tos));
2122         if (!ep->l2t)
2123             goto out;
2124         ep->mtu = dst_mtu(dst);
2125         ep->tx_chan = cxgb4_port_chan(pdev);
2126         ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2127         step = cdev->rdev.lldi.ntxq /
2128             cdev->rdev.lldi.nchan;
2129         ep->txq_idx = cxgb4_port_idx(pdev) * step;
2130         ep->ctrlq_idx = cxgb4_port_idx(pdev);
2131         step = cdev->rdev.lldi.nrxq /
2132             cdev->rdev.lldi.nchan;
2133         ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2134             cxgb4_port_idx(pdev) * step];
2135         set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2136 
2137         if (clear_mpa_v1) {
2138             ep->retry_with_mpa_v1 = 0;
2139             ep->tried_with_mpa_v1 = 0;
2140         }
2141     }
2142     err = 0;
2143 out:
2144     rcu_read_unlock();
2145 
2146     neigh_release(n);
2147 
2148     return err;
2149 }
2150 
2151 static int c4iw_reconnect(struct c4iw_ep *ep)
2152 {
2153     int err = 0;
2154     int size = 0;
2155     struct sockaddr_in *laddr = (struct sockaddr_in *)
2156                     &ep->com.cm_id->m_local_addr;
2157     struct sockaddr_in *raddr = (struct sockaddr_in *)
2158                     &ep->com.cm_id->m_remote_addr;
2159     struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
2160                       &ep->com.cm_id->m_local_addr;
2161     struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
2162                       &ep->com.cm_id->m_remote_addr;
2163     int iptype;
2164     __u8 *ra;
2165 
2166     pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id);
2167     c4iw_init_wr_wait(ep->com.wr_waitp);
2168 
2169     /* When MPA revision is different on nodes, the node with MPA_rev=2
2170      * tries to reconnect with MPA_rev 1 for the same EP through
2171      * c4iw_reconnect(), where the same EP is assigned with new tid for
2172      * further connection establishment. As we are using the same EP pointer
2173      * for reconnect, few skbs are used during the previous c4iw_connect(),
2174      * which leaves the EP with inadequate skbs for further
2175      * c4iw_reconnect(), Further causing a crash due to an empty
2176      * skb_list() during peer_abort(). Allocate skbs which is already used.
2177      */
2178     size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
2179     if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
2180         err = -ENOMEM;
2181         goto fail1;
2182     }
2183 
2184     /*
2185      * Allocate an active TID to initiate a TCP connection.
2186      */
2187     ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
2188     if (ep->atid == -1) {
2189         pr_err("%s - cannot alloc atid\n", __func__);
2190         err = -ENOMEM;
2191         goto fail2;
2192     }
2193     err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL);
2194     if (err)
2195         goto fail2a;
2196 
2197     /* find a route */
2198     if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
2199         ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
2200                       laddr->sin_addr.s_addr,
2201                       raddr->sin_addr.s_addr,
2202                       laddr->sin_port,
2203                       raddr->sin_port, ep->com.cm_id->tos);
2204         iptype = 4;
2205         ra = (__u8 *)&raddr->sin_addr;
2206     } else {
2207         ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
2208                        get_real_dev,
2209                        laddr6->sin6_addr.s6_addr,
2210                        raddr6->sin6_addr.s6_addr,
2211                        laddr6->sin6_port,
2212                        raddr6->sin6_port,
2213                        ep->com.cm_id->tos,
2214                        raddr6->sin6_scope_id);
2215         iptype = 6;
2216         ra = (__u8 *)&raddr6->sin6_addr;
2217     }
2218     if (!ep->dst) {
2219         pr_err("%s - cannot find route\n", __func__);
2220         err = -EHOSTUNREACH;
2221         goto fail3;
2222     }
2223     err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
2224             ep->com.dev->rdev.lldi.adapter_type,
2225             ep->com.cm_id->tos);
2226     if (err) {
2227         pr_err("%s - cannot alloc l2e\n", __func__);
2228         goto fail4;
2229     }
2230 
2231     pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2232          ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2233          ep->l2t->idx);
2234 
2235     state_set(&ep->com, CONNECTING);
2236     ep->tos = ep->com.cm_id->tos;
2237 
2238     /* send connect request to rnic */
2239     err = send_connect(ep);
2240     if (!err)
2241         goto out;
2242 
2243     cxgb4_l2t_release(ep->l2t);
2244 fail4:
2245     dst_release(ep->dst);
2246 fail3:
2247     xa_erase_irq(&ep->com.dev->atids, ep->atid);
2248 fail2a:
2249     cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2250 fail2:
2251     /*
2252      * remember to send notification to upper layer.
2253      * We are in here so the upper layer is not aware that this is
2254      * re-connect attempt and so, upper layer is still waiting for
2255      * response of 1st connect request.
2256      */
2257     connect_reply_upcall(ep, -ECONNRESET);
2258 fail1:
2259     c4iw_put_ep(&ep->com);
2260 out:
2261     return err;
2262 }
2263 
2264 static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2265 {
2266     struct c4iw_ep *ep;
2267     struct cpl_act_open_rpl *rpl = cplhdr(skb);
2268     unsigned int atid = TID_TID_G(AOPEN_ATID_G(
2269                       ntohl(rpl->atid_status)));
2270     struct tid_info *t = dev->rdev.lldi.tids;
2271     int status = AOPEN_STATUS_G(ntohl(rpl->atid_status));
2272     struct sockaddr_in *la;
2273     struct sockaddr_in *ra;
2274     struct sockaddr_in6 *la6;
2275     struct sockaddr_in6 *ra6;
2276     int ret = 0;
2277 
2278     ep = lookup_atid(t, atid);
2279     la = (struct sockaddr_in *)&ep->com.local_addr;
2280     ra = (struct sockaddr_in *)&ep->com.remote_addr;
2281     la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
2282     ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
2283 
2284     pr_debug("ep %p atid %u status %u errno %d\n", ep, atid,
2285          status, status2errno(status));
2286 
2287     if (cxgb_is_neg_adv(status)) {
2288         pr_debug("Connection problems for atid %u status %u (%s)\n",
2289              atid, status, neg_adv_str(status));
2290         ep->stats.connect_neg_adv++;
2291         mutex_lock(&dev->rdev.stats.lock);
2292         dev->rdev.stats.neg_adv++;
2293         mutex_unlock(&dev->rdev.stats.lock);
2294         return 0;
2295     }
2296 
2297     set_bit(ACT_OPEN_RPL, &ep->com.history);
2298 
2299     /*
2300      * Log interesting failures.
2301      */
2302     switch (status) {
2303     case CPL_ERR_CONN_RESET:
2304     case CPL_ERR_CONN_TIMEDOUT:
2305         break;
2306     case CPL_ERR_TCAM_FULL:
2307         mutex_lock(&dev->rdev.stats.lock);
2308         dev->rdev.stats.tcam_full++;
2309         mutex_unlock(&dev->rdev.stats.lock);
2310         if (ep->com.local_addr.ss_family == AF_INET &&
2311             dev->rdev.lldi.enable_fw_ofld_conn) {
2312             ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G(
2313                            ntohl(rpl->atid_status))));
2314             if (ret)
2315                 goto fail;
2316             return 0;
2317         }
2318         break;
2319     case CPL_ERR_CONN_EXIST:
2320         if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2321             set_bit(ACT_RETRY_INUSE, &ep->com.history);
2322             if (ep->com.remote_addr.ss_family == AF_INET6) {
2323                 struct sockaddr_in6 *sin6 =
2324                         (struct sockaddr_in6 *)
2325                         &ep->com.local_addr;
2326                 cxgb4_clip_release(
2327                         ep->com.dev->rdev.lldi.ports[0],
2328                         (const u32 *)
2329                         &sin6->sin6_addr.s6_addr, 1);
2330             }
2331             xa_erase_irq(&ep->com.dev->atids, atid);
2332             cxgb4_free_atid(t, atid);
2333             dst_release(ep->dst);
2334             cxgb4_l2t_release(ep->l2t);
2335             c4iw_reconnect(ep);
2336             return 0;
2337         }
2338         break;
2339     default:
2340         if (ep->com.local_addr.ss_family == AF_INET) {
2341             pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
2342                 atid, status, status2errno(status),
2343                 &la->sin_addr.s_addr, ntohs(la->sin_port),
2344                 &ra->sin_addr.s_addr, ntohs(ra->sin_port));
2345         } else {
2346             pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
2347                 atid, status, status2errno(status),
2348                 la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
2349                 ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
2350         }
2351         break;
2352     }
2353 
2354 fail:
2355     connect_reply_upcall(ep, status2errno(status));
2356     state_set(&ep->com, DEAD);
2357 
2358     if (ep->com.remote_addr.ss_family == AF_INET6) {
2359         struct sockaddr_in6 *sin6 =
2360             (struct sockaddr_in6 *)&ep->com.local_addr;
2361         cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
2362                    (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2363     }
2364     if (status && act_open_has_tid(status))
2365         cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl),
2366                  ep->com.local_addr.ss_family);
2367 
2368     xa_erase_irq(&ep->com.dev->atids, atid);
2369     cxgb4_free_atid(t, atid);
2370     dst_release(ep->dst);
2371     cxgb4_l2t_release(ep->l2t);
2372     c4iw_put_ep(&ep->com);
2373 
2374     return 0;
2375 }
2376 
2377 static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2378 {
2379     struct cpl_pass_open_rpl *rpl = cplhdr(skb);
2380     unsigned int stid = GET_TID(rpl);
2381     struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2382 
2383     if (!ep) {
2384         pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2385         goto out;
2386     }
2387     pr_debug("ep %p status %d error %d\n", ep,
2388          rpl->status, status2errno(rpl->status));
2389     c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2390     c4iw_put_ep(&ep->com);
2391 out:
2392     return 0;
2393 }
2394 
2395 static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2396 {
2397     struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
2398     unsigned int stid = GET_TID(rpl);
2399     struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2400 
2401     if (!ep) {
2402         pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2403         goto out;
2404     }
2405     pr_debug("ep %p\n", ep);
2406     c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2407     c4iw_put_ep(&ep->com);
2408 out:
2409     return 0;
2410 }
2411 
2412 static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2413              struct cpl_pass_accept_req *req)
2414 {
2415     struct cpl_pass_accept_rpl *rpl;
2416     unsigned int mtu_idx;
2417     u64 opt0;
2418     u32 opt2;
2419     u32 wscale;
2420     struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
2421     int win;
2422     enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
2423 
2424     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2425     cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
2426               enable_tcp_timestamps && req->tcpopt.tstamp,
2427               (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
2428     wscale = cxgb_compute_wscale(rcv_win);
2429 
2430     /*
2431      * Specify the largest window that will fit in opt0. The
2432      * remainder will be specified in the rx_data_ack.
2433      */
2434     win = ep->rcv_win >> 10;
2435     if (win > RCV_BUFSIZ_M)
2436         win = RCV_BUFSIZ_M;
2437     opt0 = (nocong ? NO_CONG_F : 0) |
2438            KEEP_ALIVE_F |
2439            DELACK_F |
2440            WND_SCALE_V(wscale) |
2441            MSS_IDX_V(mtu_idx) |
2442            L2T_IDX_V(ep->l2t->idx) |
2443            TX_CHAN_V(ep->tx_chan) |
2444            SMAC_SEL_V(ep->smac_idx) |
2445            DSCP_V(ep->tos >> 2) |
2446            ULP_MODE_V(ULP_MODE_TCPDDP) |
2447            RCV_BUFSIZ_V(win);
2448     opt2 = RX_CHANNEL_V(0) |
2449            RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
2450 
2451     if (enable_tcp_timestamps && req->tcpopt.tstamp)
2452         opt2 |= TSTAMPS_EN_F;
2453     if (enable_tcp_sack && req->tcpopt.sack)
2454         opt2 |= SACK_EN_F;
2455     if (wscale && enable_tcp_window_scaling)
2456         opt2 |= WND_SCALE_EN_F;
2457     if (enable_ecn) {
2458         const struct tcphdr *tcph;
2459         u32 hlen = ntohl(req->hdr_len);
2460 
2461         if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
2462             tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
2463                 IP_HDR_LEN_G(hlen);
2464         else
2465             tcph = (const void *)(req + 1) +
2466                 T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
2467         if (tcph->ece && tcph->cwr)
2468             opt2 |= CCTRL_ECN_V(1);
2469     }
2470 
2471     if (!is_t4(adapter_type)) {
2472         u32 isn = (prandom_u32() & ~7UL) - 1;
2473 
2474         skb = get_skb(skb, roundup(sizeof(*rpl5), 16), GFP_KERNEL);
2475         rpl5 = __skb_put_zero(skb, roundup(sizeof(*rpl5), 16));
2476         rpl = (void *)rpl5;
2477         INIT_TP_WR_CPL(rpl5, CPL_PASS_ACCEPT_RPL, ep->hwtid);
2478         opt2 |= T5_OPT_2_VALID_F;
2479         opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
2480         opt2 |= T5_ISS_F;
2481         if (peer2peer)
2482             isn += 4;
2483         rpl5->iss = cpu_to_be32(isn);
2484         pr_debug("iss %u\n", be32_to_cpu(rpl5->iss));
2485     } else {
2486         skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
2487         rpl = __skb_put_zero(skb, sizeof(*rpl));
2488         INIT_TP_WR_CPL(rpl, CPL_PASS_ACCEPT_RPL, ep->hwtid);
2489     }
2490 
2491     rpl->opt0 = cpu_to_be64(opt0);
2492     rpl->opt2 = cpu_to_be32(opt2);
2493     set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
2494     t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure);
2495 
2496     return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2497 }
2498 
2499 static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
2500 {
2501     pr_debug("c4iw_dev %p tid %u\n", dev, hwtid);
2502     skb_trim(skb, sizeof(struct cpl_tid_release));
2503     release_tid(&dev->rdev, hwtid, skb);
2504     return;
2505 }
2506 
2507 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2508 {
2509     struct c4iw_ep *child_ep = NULL, *parent_ep;
2510     struct cpl_pass_accept_req *req = cplhdr(skb);
2511     unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
2512     struct tid_info *t = dev->rdev.lldi.tids;
2513     unsigned int hwtid = GET_TID(req);
2514     struct dst_entry *dst;
2515     __u8 local_ip[16], peer_ip[16];
2516     __be16 local_port, peer_port;
2517     struct sockaddr_in6 *sin6;
2518     int err;
2519     u16 peer_mss = ntohs(req->tcpopt.mss);
2520     int iptype;
2521     unsigned short hdrs;
2522     u8 tos;
2523 
2524     parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
2525     if (!parent_ep) {
2526         pr_err("%s connect request on invalid stid %d\n",
2527                __func__, stid);
2528         goto reject;
2529     }
2530 
2531     if (state_read(&parent_ep->com) != LISTEN) {
2532         pr_err("%s - listening ep not in LISTEN\n", __func__);
2533         goto reject;
2534     }
2535 
2536     if (parent_ep->com.cm_id->tos_set)
2537         tos = parent_ep->com.cm_id->tos;
2538     else
2539         tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2540 
2541     cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
2542             &iptype, local_ip, peer_ip, &local_port, &peer_port);
2543 
2544     /* Find output route */
2545     if (iptype == 4)  {
2546         pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
2547              , parent_ep, hwtid,
2548              local_ip, peer_ip, ntohs(local_port),
2549              ntohs(peer_port), peer_mss);
2550         dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
2551                       *(__be32 *)local_ip, *(__be32 *)peer_ip,
2552                       local_port, peer_port, tos);
2553     } else {
2554         pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
2555              , parent_ep, hwtid,
2556              local_ip, peer_ip, ntohs(local_port),
2557              ntohs(peer_port), peer_mss);
2558         dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
2559                 local_ip, peer_ip, local_port, peer_port,
2560                 tos,
2561                 ((struct sockaddr_in6 *)
2562                  &parent_ep->com.local_addr)->sin6_scope_id);
2563     }
2564     if (!dst) {
2565         pr_err("%s - failed to find dst entry!\n", __func__);
2566         goto reject;
2567     }
2568 
2569     child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
2570     if (!child_ep) {
2571         pr_err("%s - failed to allocate ep entry!\n", __func__);
2572         dst_release(dst);
2573         goto reject;
2574     }
2575 
2576     err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
2577             parent_ep->com.dev->rdev.lldi.adapter_type, tos);
2578     if (err) {
2579         pr_err("%s - failed to allocate l2t entry!\n", __func__);
2580         dst_release(dst);
2581         kfree(child_ep);
2582         goto reject;
2583     }
2584 
2585     hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
2586            sizeof(struct tcphdr) +
2587            ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
2588     if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2589         child_ep->mtu = peer_mss + hdrs;
2590 
2591     skb_queue_head_init(&child_ep->com.ep_skb_list);
2592     if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
2593         goto fail;
2594 
2595     state_set(&child_ep->com, CONNECTING);
2596     child_ep->com.dev = dev;
2597     child_ep->com.cm_id = NULL;
2598 
2599     if (iptype == 4) {
2600         struct sockaddr_in *sin = (struct sockaddr_in *)
2601             &child_ep->com.local_addr;
2602 
2603         sin->sin_family = AF_INET;
2604         sin->sin_port = local_port;
2605         sin->sin_addr.s_addr = *(__be32 *)local_ip;
2606 
2607         sin = (struct sockaddr_in *)&child_ep->com.local_addr;
2608         sin->sin_family = AF_INET;
2609         sin->sin_port = ((struct sockaddr_in *)
2610                  &parent_ep->com.local_addr)->sin_port;
2611         sin->sin_addr.s_addr = *(__be32 *)local_ip;
2612 
2613         sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
2614         sin->sin_family = AF_INET;
2615         sin->sin_port = peer_port;
2616         sin->sin_addr.s_addr = *(__be32 *)peer_ip;
2617     } else {
2618         sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2619         sin6->sin6_family = PF_INET6;
2620         sin6->sin6_port = local_port;
2621         memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2622 
2623         sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2624         sin6->sin6_family = PF_INET6;
2625         sin6->sin6_port = ((struct sockaddr_in6 *)
2626                    &parent_ep->com.local_addr)->sin6_port;
2627         memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2628 
2629         sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
2630         sin6->sin6_family = PF_INET6;
2631         sin6->sin6_port = peer_port;
2632         memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
2633     }
2634 
2635     c4iw_get_ep(&parent_ep->com);
2636     child_ep->parent_ep = parent_ep;
2637     child_ep->tos = tos;
2638     child_ep->dst = dst;
2639     child_ep->hwtid = hwtid;
2640 
2641     pr_debug("tx_chan %u smac_idx %u rss_qid %u\n",
2642          child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
2643 
2644     timer_setup(&child_ep->timer, ep_timeout, 0);
2645     cxgb4_insert_tid(t, child_ep, hwtid,
2646              child_ep->com.local_addr.ss_family);
2647     insert_ep_tid(child_ep);
2648     if (accept_cr(child_ep, skb, req)) {
2649         c4iw_put_ep(&parent_ep->com);
2650         release_ep_resources(child_ep);
2651     } else {
2652         set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2653     }
2654     if (iptype == 6) {
2655         sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2656         cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
2657                    (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2658     }
2659     goto out;
2660 fail:
2661     c4iw_put_ep(&child_ep->com);
2662 reject:
2663     reject_cr(dev, hwtid, skb);
2664 out:
2665     if (parent_ep)
2666         c4iw_put_ep(&parent_ep->com);
2667     return 0;
2668 }
2669 
2670 static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2671 {
2672     struct c4iw_ep *ep;
2673     struct cpl_pass_establish *req = cplhdr(skb);
2674     unsigned int tid = GET_TID(req);
2675     int ret;
2676     u16 tcp_opt = ntohs(req->tcp_opt);
2677 
2678     ep = get_ep_from_tid(dev, tid);
2679     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2680     ep->snd_seq = be32_to_cpu(req->snd_isn);
2681     ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2682     ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
2683 
2684     pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt);
2685 
2686     set_emss(ep, tcp_opt);
2687 
2688     dst_confirm(ep->dst);
2689     mutex_lock(&ep->com.mutex);
2690     ep->com.state = MPA_REQ_WAIT;
2691     start_ep_timer(ep);
2692     set_bit(PASS_ESTAB, &ep->com.history);
2693     ret = send_flowc(ep);
2694     mutex_unlock(&ep->com.mutex);
2695     if (ret)
2696         c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2697     c4iw_put_ep(&ep->com);
2698 
2699     return 0;
2700 }
2701 
2702 static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2703 {
2704     struct cpl_peer_close *hdr = cplhdr(skb);
2705     struct c4iw_ep *ep;
2706     struct c4iw_qp_attributes attrs;
2707     int disconnect = 1;
2708     int release = 0;
2709     unsigned int tid = GET_TID(hdr);
2710     int ret;
2711 
2712     ep = get_ep_from_tid(dev, tid);
2713     if (!ep)
2714         return 0;
2715 
2716     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2717     dst_confirm(ep->dst);
2718 
2719     set_bit(PEER_CLOSE, &ep->com.history);
2720     mutex_lock(&ep->com.mutex);
2721     switch (ep->com.state) {
2722     case MPA_REQ_WAIT:
2723         __state_set(&ep->com, CLOSING);
2724         break;
2725     case MPA_REQ_SENT:
2726         __state_set(&ep->com, CLOSING);
2727         connect_reply_upcall(ep, -ECONNRESET);
2728         break;
2729     case MPA_REQ_RCVD:
2730 
2731         /*
2732          * We're gonna mark this puppy DEAD, but keep
2733          * the reference on it until the ULP accepts or
2734          * rejects the CR. Also wake up anyone waiting
2735          * in rdma connection migration (see c4iw_accept_cr()).
2736          */
2737         __state_set(&ep->com, CLOSING);
2738         pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2739         c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2740         break;
2741     case MPA_REP_SENT:
2742         __state_set(&ep->com, CLOSING);
2743         pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2744         c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2745         break;
2746     case FPDU_MODE:
2747         start_ep_timer(ep);
2748         __state_set(&ep->com, CLOSING);
2749         attrs.next_state = C4IW_QP_STATE_CLOSING;
2750         ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2751                        C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2752         if (ret != -ECONNRESET) {
2753             peer_close_upcall(ep);
2754             disconnect = 1;
2755         }
2756         break;
2757     case ABORTING:
2758         disconnect = 0;
2759         break;
2760     case CLOSING:
2761         __state_set(&ep->com, MORIBUND);
2762         disconnect = 0;
2763         break;
2764     case MORIBUND:
2765         (void)stop_ep_timer(ep);
2766         if (ep->com.cm_id && ep->com.qp) {
2767             attrs.next_state = C4IW_QP_STATE_IDLE;
2768             c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2769                        C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2770         }
2771         close_complete_upcall(ep, 0);
2772         __state_set(&ep->com, DEAD);
2773         release = 1;
2774         disconnect = 0;
2775         break;
2776     case DEAD:
2777         disconnect = 0;
2778         break;
2779     default:
2780         WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2781     }
2782     mutex_unlock(&ep->com.mutex);
2783     if (disconnect)
2784         c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2785     if (release)
2786         release_ep_resources(ep);
2787     c4iw_put_ep(&ep->com);
2788     return 0;
2789 }
2790 
2791 static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
2792 {
2793     complete_cached_srq_buffers(ep, ep->srqe_idx);
2794     if (ep->com.cm_id && ep->com.qp) {
2795         struct c4iw_qp_attributes attrs;
2796 
2797         attrs.next_state = C4IW_QP_STATE_ERROR;
2798         c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2799                    C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2800     }
2801     peer_abort_upcall(ep);
2802     release_ep_resources(ep);
2803     c4iw_put_ep(&ep->com);
2804 }
2805 
2806 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2807 {
2808     struct cpl_abort_req_rss6 *req = cplhdr(skb);
2809     struct c4iw_ep *ep;
2810     struct sk_buff *rpl_skb;
2811     struct c4iw_qp_attributes attrs;
2812     int ret;
2813     int release = 0;
2814     unsigned int tid = GET_TID(req);
2815     u8 status;
2816     u32 srqidx;
2817 
2818     u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
2819 
2820     ep = get_ep_from_tid(dev, tid);
2821     if (!ep)
2822         return 0;
2823 
2824     status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
2825 
2826     if (cxgb_is_neg_adv(status)) {
2827         pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
2828              ep->hwtid, status, neg_adv_str(status));
2829         ep->stats.abort_neg_adv++;
2830         mutex_lock(&dev->rdev.stats.lock);
2831         dev->rdev.stats.neg_adv++;
2832         mutex_unlock(&dev->rdev.stats.lock);
2833         goto deref_ep;
2834     }
2835 
2836     pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
2837          ep->com.state);
2838     set_bit(PEER_ABORT, &ep->com.history);
2839 
2840     /*
2841      * Wake up any threads in rdma_init() or rdma_fini().
2842      * However, this is not needed if com state is just
2843      * MPA_REQ_SENT
2844      */
2845     if (ep->com.state != MPA_REQ_SENT)
2846         c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2847 
2848     mutex_lock(&ep->com.mutex);
2849     switch (ep->com.state) {
2850     case CONNECTING:
2851         c4iw_put_ep(&ep->parent_ep->com);
2852         break;
2853     case MPA_REQ_WAIT:
2854         (void)stop_ep_timer(ep);
2855         break;
2856     case MPA_REQ_SENT:
2857         (void)stop_ep_timer(ep);
2858         if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
2859             (mpa_rev == 2 && ep->tried_with_mpa_v1))
2860             connect_reply_upcall(ep, -ECONNRESET);
2861         else {
2862             /*
2863              * we just don't send notification upwards because we
2864              * want to retry with mpa_v1 without upper layers even
2865              * knowing it.
2866              *
2867              * do some housekeeping so as to re-initiate the
2868              * connection
2869              */
2870             pr_info("%s: mpa_rev=%d. Retrying with mpav1\n",
2871                 __func__, mpa_rev);
2872             ep->retry_with_mpa_v1 = 1;
2873         }
2874         break;
2875     case MPA_REP_SENT:
2876         break;
2877     case MPA_REQ_RCVD:
2878         break;
2879     case MORIBUND:
2880     case CLOSING:
2881         stop_ep_timer(ep);
2882         fallthrough;
2883     case FPDU_MODE:
2884         if (ep->com.qp && ep->com.qp->srq) {
2885             srqidx = ABORT_RSS_SRQIDX_G(
2886                     be32_to_cpu(req->srqidx_status));
2887             if (srqidx) {
2888                 complete_cached_srq_buffers(ep, srqidx);
2889             } else {
2890                 /* Hold ep ref until finish_peer_abort() */
2891                 c4iw_get_ep(&ep->com);
2892                 __state_set(&ep->com, ABORTING);
2893                 set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
2894                 read_tcb(ep);
2895                 break;
2896 
2897             }
2898         }
2899 
2900         if (ep->com.cm_id && ep->com.qp) {
2901             attrs.next_state = C4IW_QP_STATE_ERROR;
2902             ret = c4iw_modify_qp(ep->com.qp->rhp,
2903                      ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2904                      &attrs, 1);
2905             if (ret)
2906                 pr_err("%s - qp <- error failed!\n", __func__);
2907         }
2908         peer_abort_upcall(ep);
2909         break;
2910     case ABORTING:
2911         break;
2912     case DEAD:
2913         pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
2914         mutex_unlock(&ep->com.mutex);
2915         goto deref_ep;
2916     default:
2917         WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2918         break;
2919     }
2920     dst_confirm(ep->dst);
2921     if (ep->com.state != ABORTING) {
2922         __state_set(&ep->com, DEAD);
2923         /* we don't release if we want to retry with mpa_v1 */
2924         if (!ep->retry_with_mpa_v1)
2925             release = 1;
2926     }
2927     mutex_unlock(&ep->com.mutex);
2928 
2929     rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
2930     if (WARN_ON(!rpl_skb)) {
2931         release = 1;
2932         goto out;
2933     }
2934 
2935     cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
2936 
2937     c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2938 out:
2939     if (release)
2940         release_ep_resources(ep);
2941     else if (ep->retry_with_mpa_v1) {
2942         if (ep->com.remote_addr.ss_family == AF_INET6) {
2943             struct sockaddr_in6 *sin6 =
2944                     (struct sockaddr_in6 *)
2945                     &ep->com.local_addr;
2946             cxgb4_clip_release(
2947                     ep->com.dev->rdev.lldi.ports[0],
2948                     (const u32 *)&sin6->sin6_addr.s6_addr,
2949                     1);
2950         }
2951         xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid);
2952         cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
2953                  ep->com.local_addr.ss_family);
2954         dst_release(ep->dst);
2955         cxgb4_l2t_release(ep->l2t);
2956         c4iw_reconnect(ep);
2957     }
2958 
2959 deref_ep:
2960     c4iw_put_ep(&ep->com);
2961     /* Dereferencing ep, referenced in peer_abort_intr() */
2962     c4iw_put_ep(&ep->com);
2963     return 0;
2964 }
2965 
2966 static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2967 {
2968     struct c4iw_ep *ep;
2969     struct c4iw_qp_attributes attrs;
2970     struct cpl_close_con_rpl *rpl = cplhdr(skb);
2971     int release = 0;
2972     unsigned int tid = GET_TID(rpl);
2973 
2974     ep = get_ep_from_tid(dev, tid);
2975     if (!ep)
2976         return 0;
2977 
2978     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2979 
2980     /* The cm_id may be null if we failed to connect */
2981     mutex_lock(&ep->com.mutex);
2982     set_bit(CLOSE_CON_RPL, &ep->com.history);
2983     switch (ep->com.state) {
2984     case CLOSING:
2985         __state_set(&ep->com, MORIBUND);
2986         break;
2987     case MORIBUND:
2988         (void)stop_ep_timer(ep);
2989         if ((ep->com.cm_id) && (ep->com.qp)) {
2990             attrs.next_state = C4IW_QP_STATE_IDLE;
2991             c4iw_modify_qp(ep->com.qp->rhp,
2992                          ep->com.qp,
2993                          C4IW_QP_ATTR_NEXT_STATE,
2994                          &attrs, 1);
2995         }
2996         close_complete_upcall(ep, 0);
2997         __state_set(&ep->com, DEAD);
2998         release = 1;
2999         break;
3000     case ABORTING:
3001     case DEAD:
3002         break;
3003     default:
3004         WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3005         break;
3006     }
3007     mutex_unlock(&ep->com.mutex);
3008     if (release)
3009         release_ep_resources(ep);
3010     c4iw_put_ep(&ep->com);
3011     return 0;
3012 }
3013 
3014 static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
3015 {
3016     struct cpl_rdma_terminate *rpl = cplhdr(skb);
3017     unsigned int tid = GET_TID(rpl);
3018     struct c4iw_ep *ep;
3019     struct c4iw_qp_attributes attrs;
3020 
3021     ep = get_ep_from_tid(dev, tid);
3022 
3023     if (ep) {
3024         if (ep->com.qp) {
3025             pr_warn("TERM received tid %u qpid %u\n", tid,
3026                 ep->com.qp->wq.sq.qid);
3027             attrs.next_state = C4IW_QP_STATE_TERMINATE;
3028             c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
3029                        C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
3030         }
3031 
3032         /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3,
3033          * when entering the TERM state the RNIC MUST initiate a CLOSE.
3034          */
3035         c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3036         c4iw_put_ep(&ep->com);
3037     } else
3038         pr_warn("TERM received tid %u no ep/qp\n", tid);
3039 
3040     return 0;
3041 }
3042 
3043 /*
3044  * Upcall from the adapter indicating data has been transmitted.
3045  * For us its just the single MPA request or reply.  We can now free
3046  * the skb holding the mpa message.
3047  */
3048 static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
3049 {
3050     struct c4iw_ep *ep;
3051     struct cpl_fw4_ack *hdr = cplhdr(skb);
3052     u8 credits = hdr->credits;
3053     unsigned int tid = GET_TID(hdr);
3054 
3055 
3056     ep = get_ep_from_tid(dev, tid);
3057     if (!ep)
3058         return 0;
3059     pr_debug("ep %p tid %u credits %u\n",
3060          ep, ep->hwtid, credits);
3061     if (credits == 0) {
3062         pr_debug("0 credit ack ep %p tid %u state %u\n",
3063              ep, ep->hwtid, state_read(&ep->com));
3064         goto out;
3065     }
3066 
3067     dst_confirm(ep->dst);
3068     if (ep->mpa_skb) {
3069         pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
3070              ep, ep->hwtid, state_read(&ep->com),
3071              ep->mpa_attr.initiator ? 1 : 0);
3072         mutex_lock(&ep->com.mutex);
3073         kfree_skb(ep->mpa_skb);
3074         ep->mpa_skb = NULL;
3075         if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
3076             stop_ep_timer(ep);
3077         mutex_unlock(&ep->com.mutex);
3078     }
3079 out:
3080     c4iw_put_ep(&ep->com);
3081     return 0;
3082 }
3083 
3084 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3085 {
3086     int abort;
3087     struct c4iw_ep *ep = to_ep(cm_id);
3088 
3089     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3090 
3091     mutex_lock(&ep->com.mutex);
3092     if (ep->com.state != MPA_REQ_RCVD) {
3093         mutex_unlock(&ep->com.mutex);
3094         c4iw_put_ep(&ep->com);
3095         return -ECONNRESET;
3096     }
3097     set_bit(ULP_REJECT, &ep->com.history);
3098     if (mpa_rev == 0)
3099         abort = 1;
3100     else
3101         abort = send_mpa_reject(ep, pdata, pdata_len);
3102     mutex_unlock(&ep->com.mutex);
3103 
3104     stop_ep_timer(ep);
3105     c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
3106     c4iw_put_ep(&ep->com);
3107     return 0;
3108 }
3109 
3110 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3111 {
3112     int err;
3113     struct c4iw_qp_attributes attrs;
3114     enum c4iw_qp_attr_mask mask;
3115     struct c4iw_ep *ep = to_ep(cm_id);
3116     struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
3117     struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
3118     int abort = 0;
3119 
3120     pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3121 
3122     mutex_lock(&ep->com.mutex);
3123     if (ep->com.state != MPA_REQ_RCVD) {
3124         err = -ECONNRESET;
3125         goto err_out;
3126     }
3127 
3128     if (!qp) {
3129         err = -EINVAL;
3130         goto err_out;
3131     }
3132 
3133     set_bit(ULP_ACCEPT, &ep->com.history);
3134     if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
3135         (conn_param->ird > cur_max_read_depth(ep->com.dev))) {
3136         err = -EINVAL;
3137         goto err_abort;
3138     }
3139 
3140     if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
3141         if (conn_param->ord > ep->ird) {
3142             if (RELAXED_IRD_NEGOTIATION) {
3143                 conn_param->ord = ep->ird;
3144             } else {
3145                 ep->ird = conn_param->ird;
3146                 ep->ord = conn_param->ord;
3147                 send_mpa_reject(ep, conn_param->private_data,
3148                         conn_param->private_data_len);
3149                 err = -ENOMEM;
3150                 goto err_abort;
3151             }
3152         }
3153         if (conn_param->ird < ep->ord) {
3154             if (RELAXED_IRD_NEGOTIATION &&
3155                 ep->ord <= h->rdev.lldi.max_ordird_qp) {
3156                 conn_param->ird = ep->ord;
3157             } else {
3158                 err = -ENOMEM;
3159                 goto err_abort;
3160             }
3161         }
3162     }
3163     ep->ird = conn_param->ird;
3164     ep->ord = conn_param->ord;
3165 
3166     if (ep->mpa_attr.version == 1) {
3167         if (peer2peer && ep->ird == 0)
3168             ep->ird = 1;
3169     } else {
3170         if (peer2peer &&
3171             (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
3172             (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
3173             ep->ird = 1;
3174     }
3175 
3176     pr_debug("ird %d ord %d\n", ep->ird, ep->ord);
3177 
3178     ep->com.cm_id = cm_id;
3179     ref_cm_id(&ep->com);
3180     ep->com.qp = qp;
3181     ref_qp(ep);
3182 
3183     /* bind QP to EP and move to RTS */
3184     attrs.mpa_attr = ep->mpa_attr;
3185     attrs.max_ird = ep->ird;
3186     attrs.max_ord = ep->ord;
3187     attrs.llp_stream_handle = ep;
3188     attrs.next_state = C4IW_QP_STATE_RTS;
3189 
3190     /* bind QP and TID with INIT_WR */
3191     mask = C4IW_QP_ATTR_NEXT_STATE |
3192                  C4IW_QP_ATTR_LLP_STREAM_HANDLE |
3193                  C4IW_QP_ATTR_MPA_ATTR |
3194                  C4IW_QP_ATTR_MAX_IRD |
3195                  C4IW_QP_ATTR_MAX_ORD;
3196 
3197     err = c4iw_modify_qp(ep->com.qp->rhp,
3198                  ep->com.qp, mask, &attrs, 1);
3199     if (err)
3200         goto err_deref_cm_id;
3201 
3202     set_bit(STOP_MPA_TIMER, &ep->com.flags);
3203     err = send_mpa_reply(ep, conn_param->private_data,
3204                  conn_param->private_data_len);
3205     if (err)
3206         goto err_deref_cm_id;
3207 
3208     __state_set(&ep->com, FPDU_MODE);
3209     established_upcall(ep);
3210     mutex_unlock(&ep->com.mutex);
3211     c4iw_put_ep(&ep->com);
3212     return 0;
3213 err_deref_cm_id:
3214     deref_cm_id(&ep->com);
3215 err_abort:
3216     abort = 1;
3217 err_out:
3218     mutex_unlock(&ep->com.mutex);
3219     if (abort)
3220         c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3221     c4iw_put_ep(&ep->com);
3222     return err;
3223 }
3224 
3225 static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3226 {
3227     struct in_device *ind;
3228     int found = 0;
3229     struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3230     struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3231     const struct in_ifaddr *ifa;
3232 
3233     ind = in_dev_get(dev->rdev.lldi.ports[0]);
3234     if (!ind)
3235         return -EADDRNOTAVAIL;
3236     rcu_read_lock();
3237     in_dev_for_each_ifa_rcu(ifa, ind) {
3238         if (ifa->ifa_flags & IFA_F_SECONDARY)
3239             continue;
3240         laddr->sin_addr.s_addr = ifa->ifa_address;
3241         raddr->sin_addr.s_addr = ifa->ifa_address;
3242         found = 1;
3243         break;
3244     }
3245     rcu_read_unlock();
3246 
3247     in_dev_put(ind);
3248     return found ? 0 : -EADDRNOTAVAIL;
3249 }
3250 
3251 static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
3252               unsigned char banned_flags)
3253 {
3254     struct inet6_dev *idev;
3255     int err = -EADDRNOTAVAIL;
3256 
3257     rcu_read_lock();
3258     idev = __in6_dev_get(dev);
3259     if (idev != NULL) {
3260         struct inet6_ifaddr *ifp;
3261 
3262         read_lock_bh(&idev->lock);
3263         list_for_each_entry(ifp, &idev->addr_list, if_list) {
3264             if (ifp->scope == IFA_LINK &&
3265                 !(ifp->flags & banned_flags)) {
3266                 memcpy(addr, &ifp->addr, 16);
3267                 err = 0;
3268                 break;
3269             }
3270         }
3271         read_unlock_bh(&idev->lock);
3272     }
3273     rcu_read_unlock();
3274     return err;
3275 }
3276 
3277 static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3278 {
3279     struct in6_addr addr;
3280     struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3281     struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
3282 
3283     if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
3284         memcpy(la6->sin6_addr.s6_addr, &addr, 16);
3285         memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
3286         return 0;
3287     }
3288     return -EADDRNOTAVAIL;
3289 }
3290 
3291 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3292 {
3293     struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3294     struct c4iw_ep *ep;
3295     int err = 0;
3296     struct sockaddr_in *laddr;
3297     struct sockaddr_in *raddr;
3298     struct sockaddr_in6 *laddr6;
3299     struct sockaddr_in6 *raddr6;
3300     __u8 *ra;
3301     int iptype;
3302 
3303     if ((conn_param->ord > cur_max_read_depth(dev)) ||
3304         (conn_param->ird > cur_max_read_depth(dev))) {
3305         err = -EINVAL;
3306         goto out;
3307     }
3308     ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3309     if (!ep) {
3310         pr_err("%s - cannot alloc ep\n", __func__);
3311         err = -ENOMEM;
3312         goto out;
3313     }
3314 
3315     skb_queue_head_init(&ep->com.ep_skb_list);
3316     if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
3317         err = -ENOMEM;
3318         goto fail1;
3319     }
3320 
3321     timer_setup(&ep->timer, ep_timeout, 0);
3322     ep->plen = conn_param->private_data_len;
3323     if (ep->plen)
3324         memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
3325                conn_param->private_data, ep->plen);
3326     ep->ird = conn_param->ird;
3327     ep->ord = conn_param->ord;
3328 
3329     if (peer2peer && ep->ord == 0)
3330         ep->ord = 1;
3331 
3332     ep->com.cm_id = cm_id;
3333     ref_cm_id(&ep->com);
3334     cm_id->provider_data = ep;
3335     ep->com.dev = dev;
3336     ep->com.qp = get_qhp(dev, conn_param->qpn);
3337     if (!ep->com.qp) {
3338         pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
3339         err = -EINVAL;
3340         goto fail2;
3341     }
3342     ref_qp(ep);
3343     pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn,
3344          ep->com.qp, cm_id);
3345 
3346     /*
3347      * Allocate an active TID to initiate a TCP connection.
3348      */
3349     ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
3350     if (ep->atid == -1) {
3351         pr_err("%s - cannot alloc atid\n", __func__);
3352         err = -ENOMEM;
3353         goto fail2;
3354     }
3355     err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL);
3356     if (err)
3357         goto fail5;
3358 
3359     memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3360            sizeof(ep->com.local_addr));
3361     memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
3362            sizeof(ep->com.remote_addr));
3363 
3364     laddr = (struct sockaddr_in *)&ep->com.local_addr;
3365     raddr = (struct sockaddr_in *)&ep->com.remote_addr;
3366     laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3367     raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr;
3368 
3369     if (cm_id->m_remote_addr.ss_family == AF_INET) {
3370         iptype = 4;
3371         ra = (__u8 *)&raddr->sin_addr;
3372 
3373         /*
3374          * Handle loopback requests to INADDR_ANY.
3375          */
3376         if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
3377             err = pick_local_ipaddrs(dev, cm_id);
3378             if (err)
3379                 goto fail3;
3380         }
3381 
3382         /* find a route */
3383         pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
3384              &laddr->sin_addr, ntohs(laddr->sin_port),
3385              ra, ntohs(raddr->sin_port));
3386         ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
3387                       laddr->sin_addr.s_addr,
3388                       raddr->sin_addr.s_addr,
3389                       laddr->sin_port,
3390                       raddr->sin_port, cm_id->tos);
3391     } else {
3392         iptype = 6;
3393         ra = (__u8 *)&raddr6->sin6_addr;
3394 
3395         /*
3396          * Handle loopback requests to INADDR_ANY.
3397          */
3398         if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
3399             err = pick_local_ip6addrs(dev, cm_id);
3400             if (err)
3401                 goto fail3;
3402         }
3403 
3404         /* find a route */
3405         pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
3406              laddr6->sin6_addr.s6_addr,
3407              ntohs(laddr6->sin6_port),
3408              raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
3409         ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
3410                        laddr6->sin6_addr.s6_addr,
3411                        raddr6->sin6_addr.s6_addr,
3412                        laddr6->sin6_port,
3413                        raddr6->sin6_port, cm_id->tos,
3414                        raddr6->sin6_scope_id);
3415     }
3416     if (!ep->dst) {
3417         pr_err("%s - cannot find route\n", __func__);
3418         err = -EHOSTUNREACH;
3419         goto fail3;
3420     }
3421 
3422     err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3423             ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
3424     if (err) {
3425         pr_err("%s - cannot alloc l2e\n", __func__);
3426         goto fail4;
3427     }
3428 
3429     pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
3430          ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
3431          ep->l2t->idx);
3432 
3433     state_set(&ep->com, CONNECTING);
3434     ep->tos = cm_id->tos;
3435 
3436     /* send connect request to rnic */
3437     err = send_connect(ep);
3438     if (!err)
3439         goto out;
3440 
3441     cxgb4_l2t_release(ep->l2t);
3442 fail4:
3443     dst_release(ep->dst);
3444 fail3:
3445     xa_erase_irq(&ep->com.dev->atids, ep->atid);
3446 fail5:
3447     cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3448 fail2:
3449     skb_queue_purge(&ep->com.ep_skb_list);
3450     deref_cm_id(&ep->com);
3451 fail1:
3452     c4iw_put_ep(&ep->com);
3453 out:
3454     return err;
3455 }
3456 
3457 static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3458 {
3459     int err;
3460     struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3461                     &ep->com.local_addr;
3462 
3463     if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
3464         err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
3465                      (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3466         if (err)
3467             return err;
3468     }
3469     c4iw_init_wr_wait(ep->com.wr_waitp);
3470     err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
3471                    ep->stid, &sin6->sin6_addr,
3472                    sin6->sin6_port,
3473                    ep->com.dev->rdev.lldi.rxq_ids[0]);
3474     if (!err)
3475         err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3476                       ep->com.wr_waitp,
3477                       0, 0, __func__);
3478     else if (err > 0)
3479         err = net_xmit_errno(err);
3480     if (err) {
3481         cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3482                    (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3483         pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
3484                err, ep->stid,
3485                sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
3486     }
3487     return err;
3488 }
3489 
3490 static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3491 {
3492     int err;
3493     struct sockaddr_in *sin = (struct sockaddr_in *)
3494                   &ep->com.local_addr;
3495 
3496     if (dev->rdev.lldi.enable_fw_ofld_conn) {
3497         do {
3498             err = cxgb4_create_server_filter(
3499                 ep->com.dev->rdev.lldi.ports[0], ep->stid,
3500                 sin->sin_addr.s_addr, sin->sin_port, 0,
3501                 ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
3502             if (err == -EBUSY) {
3503                 if (c4iw_fatal_error(&ep->com.dev->rdev)) {
3504                     err = -EIO;
3505                     break;
3506                 }
3507                 set_current_state(TASK_UNINTERRUPTIBLE);
3508                 schedule_timeout(usecs_to_jiffies(100));
3509             }
3510         } while (err == -EBUSY);
3511     } else {
3512         c4iw_init_wr_wait(ep->com.wr_waitp);
3513         err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
3514                 ep->stid, sin->sin_addr.s_addr, sin->sin_port,
3515                 0, ep->com.dev->rdev.lldi.rxq_ids[0]);
3516         if (!err)
3517             err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3518                           ep->com.wr_waitp,
3519                           0, 0, __func__);
3520         else if (err > 0)
3521             err = net_xmit_errno(err);
3522     }
3523     if (err)
3524         pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
3525                , err, ep->stid,
3526                &sin->sin_addr, ntohs(sin->sin_port));
3527     return err;
3528 }
3529 
3530 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3531 {
3532     int err = 0;
3533     struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3534     struct c4iw_listen_ep *ep;
3535 
3536     might_sleep();
3537 
3538     ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3539     if (!ep) {
3540         pr_err("%s - cannot alloc ep\n", __func__);
3541         err = -ENOMEM;
3542         goto fail1;
3543     }
3544     skb_queue_head_init(&ep->com.ep_skb_list);
3545     pr_debug("ep %p\n", ep);
3546     ep->com.cm_id = cm_id;
3547     ref_cm_id(&ep->com);
3548     ep->com.dev = dev;
3549     ep->backlog = backlog;
3550     memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3551            sizeof(ep->com.local_addr));
3552 
3553     /*
3554      * Allocate a server TID.
3555      */
3556     if (dev->rdev.lldi.enable_fw_ofld_conn &&
3557         ep->com.local_addr.ss_family == AF_INET)
3558         ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
3559                          cm_id->m_local_addr.ss_family, ep);
3560     else
3561         ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
3562                         cm_id->m_local_addr.ss_family, ep);
3563 
3564     if (ep->stid == -1) {
3565         pr_err("%s - cannot alloc stid\n", __func__);
3566         err = -ENOMEM;
3567         goto fail2;
3568     }
3569     err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL);
3570     if (err)
3571         goto fail3;
3572 
3573     state_set(&ep->com, LISTEN);
3574     if (ep->com.local_addr.ss_family == AF_INET)
3575         err = create_server4(dev, ep);
3576     else
3577         err = create_server6(dev, ep);
3578     if (!err) {
3579         cm_id->provider_data = ep;
3580         goto out;
3581     }
3582     xa_erase_irq(&ep->com.dev->stids, ep->stid);
3583 fail3:
3584     cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3585             ep->com.local_addr.ss_family);
3586 fail2:
3587     deref_cm_id(&ep->com);
3588     c4iw_put_ep(&ep->com);
3589 fail1:
3590 out:
3591     return err;
3592 }
3593 
3594 int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3595 {
3596     int err;
3597     struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
3598 
3599     pr_debug("ep %p\n", ep);
3600 
3601     might_sleep();
3602     state_set(&ep->com, DEAD);
3603     if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
3604         ep->com.local_addr.ss_family == AF_INET) {
3605         err = cxgb4_remove_server_filter(
3606             ep->com.dev->rdev.lldi.ports[0], ep->stid,
3607             ep->com.dev->rdev.lldi.rxq_ids[0], false);
3608     } else {
3609         struct sockaddr_in6 *sin6;
3610         c4iw_init_wr_wait(ep->com.wr_waitp);
3611         err = cxgb4_remove_server(
3612                 ep->com.dev->rdev.lldi.ports[0], ep->stid,
3613                 ep->com.dev->rdev.lldi.rxq_ids[0],
3614                 ep->com.local_addr.ss_family == AF_INET6);
3615         if (err)
3616             goto done;
3617         err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
3618                       0, 0, __func__);
3619         sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3620         cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3621                    (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3622     }
3623     xa_erase_irq(&ep->com.dev->stids, ep->stid);
3624     cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3625             ep->com.local_addr.ss_family);
3626 done:
3627     deref_cm_id(&ep->com);
3628     c4iw_put_ep(&ep->com);
3629     return err;
3630 }
3631 
3632 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3633 {
3634     int ret = 0;
3635     int close = 0;
3636     int fatal = 0;
3637     struct c4iw_rdev *rdev;
3638 
3639     mutex_lock(&ep->com.mutex);
3640 
3641     pr_debug("ep %p state %s, abrupt %d\n", ep,
3642          states[ep->com.state], abrupt);
3643 
3644     /*
3645      * Ref the ep here in case we have fatal errors causing the
3646      * ep to be released and freed.
3647      */
3648     c4iw_get_ep(&ep->com);
3649 
3650     rdev = &ep->com.dev->rdev;
3651     if (c4iw_fatal_error(rdev)) {
3652         fatal = 1;
3653         close_complete_upcall(ep, -EIO);
3654         ep->com.state = DEAD;
3655     }
3656     switch (ep->com.state) {
3657     case MPA_REQ_WAIT:
3658     case MPA_REQ_SENT:
3659     case MPA_REQ_RCVD:
3660     case MPA_REP_SENT:
3661     case FPDU_MODE:
3662     case CONNECTING:
3663         close = 1;
3664         if (abrupt)
3665             ep->com.state = ABORTING;
3666         else {
3667             ep->com.state = CLOSING;
3668 
3669             /*
3670              * if we close before we see the fw4_ack() then we fix
3671              * up the timer state since we're reusing it.
3672              */
3673             if (ep->mpa_skb &&
3674                 test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
3675                 clear_bit(STOP_MPA_TIMER, &ep->com.flags);
3676                 stop_ep_timer(ep);
3677             }
3678             start_ep_timer(ep);
3679         }
3680         set_bit(CLOSE_SENT, &ep->com.flags);
3681         break;
3682     case CLOSING:
3683         if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
3684             close = 1;
3685             if (abrupt) {
3686                 (void)stop_ep_timer(ep);
3687                 ep->com.state = ABORTING;
3688             } else
3689                 ep->com.state = MORIBUND;
3690         }
3691         break;
3692     case MORIBUND:
3693     case ABORTING:
3694     case DEAD:
3695         pr_debug("ignoring disconnect ep %p state %u\n",
3696              ep, ep->com.state);
3697         break;
3698     default:
3699         WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3700         break;
3701     }
3702 
3703     if (close) {
3704         if (abrupt) {
3705             set_bit(EP_DISC_ABORT, &ep->com.history);
3706             ret = send_abort(ep);
3707         } else {
3708             set_bit(EP_DISC_CLOSE, &ep->com.history);
3709             ret = send_halfclose(ep);
3710         }
3711         if (ret) {
3712             set_bit(EP_DISC_FAIL, &ep->com.history);
3713             if (!abrupt) {
3714                 stop_ep_timer(ep);
3715                 close_complete_upcall(ep, -EIO);
3716             }
3717             if (ep->com.qp) {
3718                 struct c4iw_qp_attributes attrs;
3719 
3720                 attrs.next_state = C4IW_QP_STATE_ERROR;
3721                 ret = c4iw_modify_qp(ep->com.qp->rhp,
3722                              ep->com.qp,
3723                              C4IW_QP_ATTR_NEXT_STATE,
3724                              &attrs, 1);
3725                 if (ret)
3726                     pr_err("%s - qp <- error failed!\n",
3727                            __func__);
3728             }
3729             fatal = 1;
3730         }
3731     }
3732     mutex_unlock(&ep->com.mutex);
3733     c4iw_put_ep(&ep->com);
3734     if (fatal)
3735         release_ep_resources(ep);
3736     return ret;
3737 }
3738 
3739 static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3740             struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3741 {
3742     struct c4iw_ep *ep;
3743     int atid = be32_to_cpu(req->tid);
3744 
3745     ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids,
3746                        (__force u32) req->tid);
3747     if (!ep)
3748         return;
3749 
3750     switch (req->retval) {
3751     case FW_ENOMEM:
3752         set_bit(ACT_RETRY_NOMEM, &ep->com.history);
3753         if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3754             send_fw_act_open_req(ep, atid);
3755             return;
3756         }
3757         fallthrough;
3758     case FW_EADDRINUSE:
3759         set_bit(ACT_RETRY_INUSE, &ep->com.history);
3760         if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3761             send_fw_act_open_req(ep, atid);
3762             return;
3763         }
3764         break;
3765     default:
3766         pr_info("%s unexpected ofld conn wr retval %d\n",
3767                __func__, req->retval);
3768         break;
3769     }
3770     pr_err("active ofld_connect_wr failure %d atid %d\n",
3771            req->retval, atid);
3772     mutex_lock(&dev->rdev.stats.lock);
3773     dev->rdev.stats.act_ofld_conn_fails++;
3774     mutex_unlock(&dev->rdev.stats.lock);
3775     connect_reply_upcall(ep, status2errno(req->retval));
3776     state_set(&ep->com, DEAD);
3777     if (ep->com.remote_addr.ss_family == AF_INET6) {
3778         struct sockaddr_in6 *sin6 =
3779             (struct sockaddr_in6 *)&ep->com.local_addr;
3780         cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3781                    (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3782     }
3783     xa_erase_irq(&dev->atids, atid);
3784     cxgb4_free_atid(dev->rdev.lldi.tids, atid);
3785     dst_release(ep->dst);
3786     cxgb4_l2t_release(ep->l2t);
3787     c4iw_put_ep(&ep->com);
3788 }
3789 
3790 static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3791             struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3792 {
3793     struct sk_buff *rpl_skb;
3794     struct cpl_pass_accept_req *cpl;
3795     int ret;
3796 
3797     rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
3798     if (req->retval) {
3799         pr_err("%s passive open failure %d\n", __func__, req->retval);
3800         mutex_lock(&dev->rdev.stats.lock);
3801         dev->rdev.stats.pas_ofld_conn_fails++;
3802         mutex_unlock(&dev->rdev.stats.lock);
3803         kfree_skb(rpl_skb);
3804     } else {
3805         cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
3806         OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
3807                     (__force u32) htonl(
3808                     (__force u32) req->tid)));
3809         ret = pass_accept_req(dev, rpl_skb);
3810         if (!ret)
3811             kfree_skb(rpl_skb);
3812     }
3813     return;
3814 }
3815 
3816 static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
3817 {
3818     u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
3819     u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
3820     u64 t;
3821     u32 shift = 32;
3822 
3823     t = (thi << shift) | (tlo >> shift);
3824 
3825     return t;
3826 }
3827 
3828 static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
3829 {
3830     u32 v;
3831     u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
3832 
3833     if (word & 0x1)
3834         shift += 32;
3835     v = (t >> shift) & mask;
3836     return v;
3837 }
3838 
3839 static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
3840 {
3841     struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
3842     __be64 *tcb = (__be64 *)(rpl + 1);
3843     unsigned int tid = GET_TID(rpl);
3844     struct c4iw_ep *ep;
3845     u64 t_flags_64;
3846     u32 rx_pdu_out;
3847 
3848     ep = get_ep_from_tid(dev, tid);
3849     if (!ep)
3850         return 0;
3851     /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
3852      * determine if there's a rx PDU feedback event pending.
3853      *
3854      * If that bit is set, it means we'll need to re-read the TCB's
3855      * rq_start value. The final value is the one present in a TCB
3856      * with the TF_RX_PDU_OUT bit cleared.
3857      */
3858 
3859     t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
3860     rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
3861 
3862     c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
3863     c4iw_put_ep(&ep->com); /* from read_tcb() */
3864 
3865     /* If TF_RX_PDU_OUT bit is set, re-read the TCB */
3866     if (rx_pdu_out) {
3867         if (++ep->rx_pdu_out_cnt >= 2) {
3868             WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
3869             goto cleanup;
3870         }
3871         read_tcb(ep);
3872         return 0;
3873     }
3874 
3875     ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M,
3876                       TCB_RQ_START_S);
3877 cleanup:
3878     pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
3879 
3880     if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
3881         finish_peer_abort(dev, ep);
3882     else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
3883         send_abort_req(ep);
3884     else
3885         WARN_ONCE(1, "unexpected state!");
3886 
3887     return 0;
3888 }
3889 
3890 static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3891 {
3892     struct cpl_fw6_msg *rpl = cplhdr(skb);
3893     struct cpl_fw6_msg_ofld_connection_wr_rpl *req;
3894 
3895     switch (rpl->type) {
3896     case FW6_TYPE_CQE:
3897         c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
3898         break;
3899     case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3900         req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
3901         switch (req->t_state) {
3902         case TCP_SYN_SENT:
3903             active_ofld_conn_reply(dev, skb, req);
3904             break;
3905         case TCP_SYN_RECV:
3906             passive_ofld_conn_reply(dev, skb, req);
3907             break;
3908         default:
3909             pr_err("%s unexpected ofld conn wr state %d\n",
3910                    __func__, req->t_state);
3911             break;
3912         }
3913         break;
3914     }
3915     return 0;
3916 }
3917 
3918 static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3919 {
3920     __be32 l2info;
3921     __be16 hdr_len, vlantag, len;
3922     u16 eth_hdr_len;
3923     int tcp_hdr_len, ip_hdr_len;
3924     u8 intf;
3925     struct cpl_rx_pkt *cpl = cplhdr(skb);
3926     struct cpl_pass_accept_req *req;
3927     struct tcp_options_received tmp_opt;
3928     struct c4iw_dev *dev;
3929     enum chip_type type;
3930 
3931     dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3932     /* Store values from cpl_rx_pkt in temporary location. */
3933     vlantag = cpl->vlan;
3934     len = cpl->len;
3935     l2info  = cpl->l2info;
3936     hdr_len = cpl->hdr_len;
3937     intf = cpl->iff;
3938 
3939     __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
3940 
3941     /*
3942      * We need to parse the TCP options from SYN packet.
3943      * to generate cpl_pass_accept_req.
3944      */
3945     memset(&tmp_opt, 0, sizeof(tmp_opt));
3946     tcp_clear_options(&tmp_opt);
3947     tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
3948 
3949     req = __skb_push(skb, sizeof(*req));
3950     memset(req, 0, sizeof(*req));
3951     req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
3952              SYN_MAC_IDX_V(RX_MACIDX_G(
3953              be32_to_cpu(l2info))) |
3954              SYN_XACT_MATCH_F);
3955     type = dev->rdev.lldi.adapter_type;
3956     tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
3957     ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
3958     req->hdr_len =
3959         cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
3960     if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
3961         eth_hdr_len = is_t4(type) ?
3962                 RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
3963                 RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
3964         req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
3965                         IP_HDR_LEN_V(ip_hdr_len) |
3966                         ETH_HDR_LEN_V(eth_hdr_len));
3967     } else { /* T6 and later */
3968         eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
3969         req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
3970                         T6_IP_HDR_LEN_V(ip_hdr_len) |
3971                         T6_ETH_HDR_LEN_V(eth_hdr_len));
3972     }
3973     req->vlan = vlantag;
3974     req->len = len;
3975     req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
3976                     PASS_OPEN_TOS_V(tos));
3977     req->tcpopt.mss = htons(tmp_opt.mss_clamp);
3978     if (tmp_opt.wscale_ok)
3979         req->tcpopt.wsf = tmp_opt.snd_wscale;
3980     req->tcpopt.tstamp = tmp_opt.saw_tstamp;
3981     if (tmp_opt.sack_ok)
3982         req->tcpopt.sack = 1;
3983     OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
3984     return;
3985 }
3986 
3987 static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
3988                   __be32 laddr, __be16 lport,
3989                   __be32 raddr, __be16 rport,
3990                   u32 rcv_isn, u32 filter, u16 window,
3991                   u32 rss_qid, u8 port_id)
3992 {
3993     struct sk_buff *req_skb;
3994     struct fw_ofld_connection_wr *req;
3995     struct cpl_pass_accept_req *cpl = cplhdr(skb);
3996     int ret;
3997 
3998     req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
3999     if (!req_skb)
4000         return;
4001     req = __skb_put_zero(req_skb, sizeof(*req));
4002     req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
4003     req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
4004     req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
4005     req->le.filter = (__force __be32) filter;
4006     req->le.lport = lport;
4007     req->le.pport = rport;
4008     req->le.u.ipv4.lip = laddr;
4009     req->le.u.ipv4.pip = raddr;
4010     req->tcb.rcv_nxt = htonl(rcv_isn + 1);
4011     req->tcb.rcv_adv = htons(window);
4012     req->tcb.t_state_to_astid =
4013          htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) |
4014             FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) |
4015             FW_OFLD_CONNECTION_WR_ASTID_V(
4016             PASS_OPEN_TID_G(ntohl(cpl->tos_stid))));
4017 
4018     /*
4019      * We store the qid in opt2 which will be used by the firmware
4020      * to send us the wr response.
4021      */
4022     req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
4023 
4024     /*
4025      * We initialize the MSS index in TCB to 0xF.
4026      * So that when driver sends cpl_pass_accept_rpl
4027      * TCB picks up the correct value. If this was 0
4028      * TP will ignore any value > 0 for MSS index.
4029      */
4030     req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
4031     req->cookie = (uintptr_t)skb;
4032 
4033     set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
4034     ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
4035     if (ret < 0) {
4036         pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
4037                ret);
4038         kfree_skb(skb);
4039         kfree_skb(req_skb);
4040     }
4041 }
4042 
4043 /*
4044  * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
4045  * messages when a filter is being used instead of server to
4046  * redirect a syn packet. When packets hit filter they are redirected
4047  * to the offload queue and driver tries to establish the connection
4048  * using firmware work request.
4049  */
4050 static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
4051 {
4052     int stid;
4053     unsigned int filter;
4054     struct ethhdr *eh = NULL;
4055     struct vlan_ethhdr *vlan_eh = NULL;
4056     struct iphdr *iph;
4057     struct tcphdr *tcph;
4058     struct rss_header *rss = (void *)skb->data;
4059     struct cpl_rx_pkt *cpl = (void *)skb->data;
4060     struct cpl_pass_accept_req *req = (void *)(rss + 1);
4061     struct l2t_entry *e;
4062     struct dst_entry *dst;
4063     struct c4iw_ep *lep = NULL;
4064     u16 window;
4065     struct port_info *pi;
4066     struct net_device *pdev;
4067     u16 rss_qid, eth_hdr_len;
4068     int step;
4069     struct neighbour *neigh;
4070 
4071     /* Drop all non-SYN packets */
4072     if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F)))
4073         goto reject;
4074 
4075     /*
4076      * Drop all packets which did not hit the filter.
4077      * Unlikely to happen.
4078      */
4079     if (!(rss->filter_hit && rss->filter_tid))
4080         goto reject;
4081 
4082     /*
4083      * Calculate the server tid from filter hit index from cpl_rx_pkt.
4084      */
4085     stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
4086 
4087     lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
4088     if (!lep) {
4089         pr_warn("%s connect request on invalid stid %d\n",
4090             __func__, stid);
4091         goto reject;
4092     }
4093 
4094     switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
4095     case CHELSIO_T4:
4096         eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4097         break;
4098     case CHELSIO_T5:
4099         eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4100         break;
4101     case CHELSIO_T6:
4102         eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4103         break;
4104     default:
4105         pr_err("T%d Chip is not supported\n",
4106                CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
4107         goto reject;
4108     }
4109 
4110     if (eth_hdr_len == ETH_HLEN) {
4111         eh = (struct ethhdr *)(req + 1);
4112         iph = (struct iphdr *)(eh + 1);
4113     } else {
4114         vlan_eh = (struct vlan_ethhdr *)(req + 1);
4115         iph = (struct iphdr *)(vlan_eh + 1);
4116         __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
4117     }
4118 
4119     if (iph->version != 0x4)
4120         goto reject;
4121 
4122     tcph = (struct tcphdr *)(iph + 1);
4123     skb_set_network_header(skb, (void *)iph - (void *)rss);
4124     skb_set_transport_header(skb, (void *)tcph - (void *)rss);
4125     skb_get(skb);
4126 
4127     pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n",
4128          ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
4129          ntohs(tcph->source), iph->tos);
4130 
4131     dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
4132                   iph->daddr, iph->saddr, tcph->dest,
4133                   tcph->source, iph->tos);
4134     if (!dst) {
4135         pr_err("%s - failed to find dst entry!\n", __func__);
4136         goto reject;
4137     }
4138     neigh = dst_neigh_lookup_skb(dst, skb);
4139 
4140     if (!neigh) {
4141         pr_err("%s - failed to allocate neigh!\n", __func__);
4142         goto free_dst;
4143     }
4144 
4145     if (neigh->dev->flags & IFF_LOOPBACK) {
4146         pdev = ip_dev_find(&init_net, iph->daddr);
4147         e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4148                     pdev, 0);
4149         pi = (struct port_info *)netdev_priv(pdev);
4150         dev_put(pdev);
4151     } else {
4152         pdev = get_real_dev(neigh->dev);
4153         e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4154                     pdev, 0);
4155         pi = (struct port_info *)netdev_priv(pdev);
4156     }
4157     neigh_release(neigh);
4158     if (!e) {
4159         pr_err("%s - failed to allocate l2t entry!\n",
4160                __func__);
4161         goto free_dst;
4162     }
4163 
4164     step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
4165     rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
4166     window = (__force u16) htons((__force u16)tcph->window);
4167 
4168     /* Calcuate filter portion for LE region. */
4169     filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
4170                             dev->rdev.lldi.ports[0],
4171                             e));
4172 
4173     /*
4174      * Synthesize the cpl_pass_accept_req. We have everything except the
4175      * TID. Once firmware sends a reply with TID we update the TID field
4176      * in cpl and pass it through the regular cpl_pass_accept_req path.
4177      */
4178     build_cpl_pass_accept_req(skb, stid, iph->tos);
4179     send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
4180                   tcph->source, ntohl(tcph->seq), filter, window,
4181                   rss_qid, pi->port_id);
4182     cxgb4_l2t_release(e);
4183 free_dst:
4184     dst_release(dst);
4185 reject:
4186     if (lep)
4187         c4iw_put_ep(&lep->com);
4188     return 0;
4189 }
4190 
4191 /*
4192  * These are the real handlers that are called from a
4193  * work queue.
4194  */
4195 static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
4196     [CPL_ACT_ESTABLISH] = act_establish,
4197     [CPL_ACT_OPEN_RPL] = act_open_rpl,
4198     [CPL_RX_DATA] = rx_data,
4199     [CPL_ABORT_RPL_RSS] = abort_rpl,
4200     [CPL_ABORT_RPL] = abort_rpl,
4201     [CPL_PASS_OPEN_RPL] = pass_open_rpl,
4202     [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
4203     [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
4204     [CPL_PASS_ESTABLISH] = pass_establish,
4205     [CPL_PEER_CLOSE] = peer_close,
4206     [CPL_ABORT_REQ_RSS] = peer_abort,
4207     [CPL_CLOSE_CON_RPL] = close_con_rpl,
4208     [CPL_RDMA_TERMINATE] = terminate,
4209     [CPL_FW4_ACK] = fw4_ack,
4210     [CPL_GET_TCB_RPL] = read_tcb_rpl,
4211     [CPL_FW6_MSG] = deferred_fw6_msg,
4212     [CPL_RX_PKT] = rx_pkt,
4213     [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
4214     [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
4215 };
4216 
4217 static void process_timeout(struct c4iw_ep *ep)
4218 {
4219     struct c4iw_qp_attributes attrs;
4220     int abort = 1;
4221 
4222     mutex_lock(&ep->com.mutex);
4223     pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state);
4224     set_bit(TIMEDOUT, &ep->com.history);
4225     switch (ep->com.state) {
4226     case MPA_REQ_SENT:
4227         connect_reply_upcall(ep, -ETIMEDOUT);
4228         break;
4229     case MPA_REQ_WAIT:
4230     case MPA_REQ_RCVD:
4231     case MPA_REP_SENT:
4232     case FPDU_MODE:
4233         break;
4234     case CLOSING:
4235     case MORIBUND:
4236         if (ep->com.cm_id && ep->com.qp) {
4237             attrs.next_state = C4IW_QP_STATE_ERROR;
4238             c4iw_modify_qp(ep->com.qp->rhp,
4239                      ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
4240                      &attrs, 1);
4241         }
4242         close_complete_upcall(ep, -ETIMEDOUT);
4243         break;
4244     case ABORTING:
4245     case DEAD:
4246 
4247         /*
4248          * These states are expected if the ep timed out at the same
4249          * time as another thread was calling stop_ep_timer().
4250          * So we silently do nothing for these states.
4251          */
4252         abort = 0;
4253         break;
4254     default:
4255         WARN(1, "%s unexpected state ep %p tid %u state %u\n",
4256             __func__, ep, ep->hwtid, ep->com.state);
4257         abort = 0;
4258     }
4259     mutex_unlock(&ep->com.mutex);
4260     if (abort)
4261         c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
4262     c4iw_put_ep(&ep->com);
4263 }
4264 
4265 static void process_timedout_eps(void)
4266 {
4267     struct c4iw_ep *ep;
4268 
4269     spin_lock_irq(&timeout_lock);
4270     while (!list_empty(&timeout_list)) {
4271         struct list_head *tmp;
4272 
4273         tmp = timeout_list.next;
4274         list_del(tmp);
4275         tmp->next = NULL;
4276         tmp->prev = NULL;
4277         spin_unlock_irq(&timeout_lock);
4278         ep = list_entry(tmp, struct c4iw_ep, entry);
4279         process_timeout(ep);
4280         spin_lock_irq(&timeout_lock);
4281     }
4282     spin_unlock_irq(&timeout_lock);
4283 }
4284 
4285 static void process_work(struct work_struct *work)
4286 {
4287     struct sk_buff *skb = NULL;
4288     struct c4iw_dev *dev;
4289     struct cpl_act_establish *rpl;
4290     unsigned int opcode;
4291     int ret;
4292 
4293     process_timedout_eps();
4294     while ((skb = skb_dequeue(&rxq))) {
4295         rpl = cplhdr(skb);
4296         dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
4297         opcode = rpl->ot.opcode;
4298 
4299         if (opcode >= ARRAY_SIZE(work_handlers) ||
4300             !work_handlers[opcode]) {
4301             pr_err("No handler for opcode 0x%x.\n", opcode);
4302             kfree_skb(skb);
4303         } else {
4304             ret = work_handlers[opcode](dev, skb);
4305             if (!ret)
4306                 kfree_skb(skb);
4307         }
4308         process_timedout_eps();
4309     }
4310 }
4311 
4312 static DECLARE_WORK(skb_work, process_work);
4313 
4314 static void ep_timeout(struct timer_list *t)
4315 {
4316     struct c4iw_ep *ep = from_timer(ep, t, timer);
4317     int kickit = 0;
4318 
4319     spin_lock(&timeout_lock);
4320     if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
4321         /*
4322          * Only insert if it is not already on the list.
4323          */
4324         if (!ep->entry.next) {
4325             list_add_tail(&ep->entry, &timeout_list);
4326             kickit = 1;
4327         }
4328     }
4329     spin_unlock(&timeout_lock);
4330     if (kickit)
4331         queue_work(workq, &skb_work);
4332 }
4333 
4334 /*
4335  * All the CM events are handled on a work queue to have a safe context.
4336  */
4337 static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
4338 {
4339 
4340     /*
4341      * Save dev in the skb->cb area.
4342      */
4343     *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
4344 
4345     /*
4346      * Queue the skb and schedule the worker thread.
4347      */
4348     skb_queue_tail(&rxq, skb);
4349     queue_work(workq, &skb_work);
4350     return 0;
4351 }
4352 
4353 static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
4354 {
4355     struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
4356 
4357     if (rpl->status != CPL_ERR_NONE) {
4358         pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
4359                rpl->status, GET_TID(rpl));
4360     }
4361     kfree_skb(skb);
4362     return 0;
4363 }
4364 
4365 static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
4366 {
4367     struct cpl_fw6_msg *rpl = cplhdr(skb);
4368     struct c4iw_wr_wait *wr_waitp;
4369     int ret;
4370 
4371     pr_debug("type %u\n", rpl->type);
4372 
4373     switch (rpl->type) {
4374     case FW6_TYPE_WR_RPL:
4375         ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
4376         wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
4377         pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret);
4378         if (wr_waitp)
4379             c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0);
4380         kfree_skb(skb);
4381         break;
4382     case FW6_TYPE_CQE:
4383     case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
4384         sched(dev, skb);
4385         break;
4386     default:
4387         pr_err("%s unexpected fw6 msg type %u\n",
4388                __func__, rpl->type);
4389         kfree_skb(skb);
4390         break;
4391     }
4392     return 0;
4393 }
4394 
4395 static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4396 {
4397     struct cpl_abort_req_rss *req = cplhdr(skb);
4398     struct c4iw_ep *ep;
4399     unsigned int tid = GET_TID(req);
4400 
4401     ep = get_ep_from_tid(dev, tid);
4402     /* This EP will be dereferenced in peer_abort() */
4403     if (!ep) {
4404         pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
4405         kfree_skb(skb);
4406         return 0;
4407     }
4408     if (cxgb_is_neg_adv(req->status)) {
4409         pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
4410              ep->hwtid, req->status,
4411              neg_adv_str(req->status));
4412         goto out;
4413     }
4414     pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state);
4415 
4416     c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
4417 out:
4418     sched(dev, skb);
4419     return 0;
4420 }
4421 
4422 /*
4423  * Most upcalls from the T4 Core go to sched() to
4424  * schedule the processing on a work queue.
4425  */
4426 c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
4427     [CPL_ACT_ESTABLISH] = sched,
4428     [CPL_ACT_OPEN_RPL] = sched,
4429     [CPL_RX_DATA] = sched,
4430     [CPL_ABORT_RPL_RSS] = sched,
4431     [CPL_ABORT_RPL] = sched,
4432     [CPL_PASS_OPEN_RPL] = sched,
4433     [CPL_CLOSE_LISTSRV_RPL] = sched,
4434     [CPL_PASS_ACCEPT_REQ] = sched,
4435     [CPL_PASS_ESTABLISH] = sched,
4436     [CPL_PEER_CLOSE] = sched,
4437     [CPL_CLOSE_CON_RPL] = sched,
4438     [CPL_ABORT_REQ_RSS] = peer_abort_intr,
4439     [CPL_RDMA_TERMINATE] = sched,
4440     [CPL_FW4_ACK] = sched,
4441     [CPL_SET_TCB_RPL] = set_tcb_rpl,
4442     [CPL_GET_TCB_RPL] = sched,
4443     [CPL_FW6_MSG] = fw6_msg,
4444     [CPL_RX_PKT] = sched
4445 };
4446 
4447 int __init c4iw_cm_init(void)
4448 {
4449     skb_queue_head_init(&rxq);
4450 
4451     workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM);
4452     if (!workq)
4453         return -ENOMEM;
4454 
4455     return 0;
4456 }
4457 
4458 void c4iw_cm_term(void)
4459 {
4460     WARN_ON(!list_empty(&timeout_list));
4461     destroy_workqueue(workq);
4462 }