Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
0002 
0003 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
0004 /*          Kai Shen <kaishen@linux.alibaba.com> */
0005 /* Copyright (c) 2020-2022, Alibaba Group. */
0006 
0007 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
0008 /*          Fredy Neeser */
0009 /*          Greg Joyce <greg@opengridcomputing.com> */
0010 /* Copyright (c) 2008-2019, IBM Corporation */
0011 /* Copyright (c) 2017, Open Grid Computing, Inc. */
0012 
0013 #include <linux/errno.h>
0014 #include <linux/inetdevice.h>
0015 #include <linux/net.h>
0016 #include <linux/types.h>
0017 #include <linux/workqueue.h>
0018 #include <net/addrconf.h>
0019 
0020 #include <rdma/ib_user_verbs.h>
0021 #include <rdma/ib_verbs.h>
0022 
0023 #include "erdma.h"
0024 #include "erdma_cm.h"
0025 #include "erdma_verbs.h"
0026 
0027 static struct workqueue_struct *erdma_cm_wq;
0028 
0029 static void erdma_cm_llp_state_change(struct sock *sk);
0030 static void erdma_cm_llp_data_ready(struct sock *sk);
0031 static void erdma_cm_llp_error_report(struct sock *sk);
0032 
0033 static void erdma_sk_assign_cm_upcalls(struct sock *sk)
0034 {
0035     write_lock_bh(&sk->sk_callback_lock);
0036     sk->sk_state_change = erdma_cm_llp_state_change;
0037     sk->sk_data_ready = erdma_cm_llp_data_ready;
0038     sk->sk_error_report = erdma_cm_llp_error_report;
0039     write_unlock_bh(&sk->sk_callback_lock);
0040 }
0041 
0042 static void erdma_sk_save_upcalls(struct sock *sk)
0043 {
0044     struct erdma_cep *cep = sk_to_cep(sk);
0045 
0046     write_lock_bh(&sk->sk_callback_lock);
0047     cep->sk_state_change = sk->sk_state_change;
0048     cep->sk_data_ready = sk->sk_data_ready;
0049     cep->sk_error_report = sk->sk_error_report;
0050     write_unlock_bh(&sk->sk_callback_lock);
0051 }
0052 
0053 static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep)
0054 {
0055     sk->sk_state_change = cep->sk_state_change;
0056     sk->sk_data_ready = cep->sk_data_ready;
0057     sk->sk_error_report = cep->sk_error_report;
0058     sk->sk_user_data = NULL;
0059 }
0060 
0061 static void erdma_socket_disassoc(struct socket *s)
0062 {
0063     struct sock *sk = s->sk;
0064     struct erdma_cep *cep;
0065 
0066     if (sk) {
0067         write_lock_bh(&sk->sk_callback_lock);
0068         cep = sk_to_cep(sk);
0069         if (cep) {
0070             erdma_sk_restore_upcalls(sk, cep);
0071             erdma_cep_put(cep);
0072         } else {
0073             WARN_ON_ONCE(1);
0074         }
0075         write_unlock_bh(&sk->sk_callback_lock);
0076     } else {
0077         WARN_ON_ONCE(1);
0078     }
0079 }
0080 
0081 static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s)
0082 {
0083     cep->sock = s;
0084     erdma_cep_get(cep);
0085     s->sk->sk_user_data = cep;
0086 
0087     erdma_sk_save_upcalls(s->sk);
0088     erdma_sk_assign_cm_upcalls(s->sk);
0089 }
0090 
0091 static void erdma_disassoc_listen_cep(struct erdma_cep *cep)
0092 {
0093     if (cep->listen_cep) {
0094         erdma_cep_put(cep->listen_cep);
0095         cep->listen_cep = NULL;
0096     }
0097 }
0098 
0099 static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev)
0100 {
0101     struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
0102     unsigned long flags;
0103 
0104     if (!cep)
0105         return NULL;
0106 
0107     INIT_LIST_HEAD(&cep->listenq);
0108     INIT_LIST_HEAD(&cep->devq);
0109     INIT_LIST_HEAD(&cep->work_freelist);
0110 
0111     kref_init(&cep->ref);
0112     cep->state = ERDMA_EPSTATE_IDLE;
0113     init_waitqueue_head(&cep->waitq);
0114     spin_lock_init(&cep->lock);
0115     cep->dev = dev;
0116 
0117     spin_lock_irqsave(&dev->lock, flags);
0118     list_add_tail(&cep->devq, &dev->cep_list);
0119     spin_unlock_irqrestore(&dev->lock, flags);
0120 
0121     return cep;
0122 }
0123 
0124 static void erdma_cm_free_work(struct erdma_cep *cep)
0125 {
0126     struct list_head *w, *tmp;
0127     struct erdma_cm_work *work;
0128 
0129     list_for_each_safe(w, tmp, &cep->work_freelist) {
0130         work = list_entry(w, struct erdma_cm_work, list);
0131         list_del(&work->list);
0132         kfree(work);
0133     }
0134 }
0135 
0136 static void erdma_cancel_mpatimer(struct erdma_cep *cep)
0137 {
0138     spin_lock_bh(&cep->lock);
0139     if (cep->mpa_timer) {
0140         if (cancel_delayed_work(&cep->mpa_timer->work)) {
0141             erdma_cep_put(cep);
0142             kfree(cep->mpa_timer);
0143         }
0144         cep->mpa_timer = NULL;
0145     }
0146     spin_unlock_bh(&cep->lock);
0147 }
0148 
0149 static void erdma_put_work(struct erdma_cm_work *work)
0150 {
0151     INIT_LIST_HEAD(&work->list);
0152     spin_lock_bh(&work->cep->lock);
0153     list_add(&work->list, &work->cep->work_freelist);
0154     spin_unlock_bh(&work->cep->lock);
0155 }
0156 
0157 static void erdma_cep_set_inuse(struct erdma_cep *cep)
0158 {
0159     unsigned long flags;
0160 
0161     spin_lock_irqsave(&cep->lock, flags);
0162     while (cep->in_use) {
0163         spin_unlock_irqrestore(&cep->lock, flags);
0164         wait_event_interruptible(cep->waitq, !cep->in_use);
0165         if (signal_pending(current))
0166             flush_signals(current);
0167 
0168         spin_lock_irqsave(&cep->lock, flags);
0169     }
0170 
0171     cep->in_use = 1;
0172     spin_unlock_irqrestore(&cep->lock, flags);
0173 }
0174 
0175 static void erdma_cep_set_free(struct erdma_cep *cep)
0176 {
0177     unsigned long flags;
0178 
0179     spin_lock_irqsave(&cep->lock, flags);
0180     cep->in_use = 0;
0181     spin_unlock_irqrestore(&cep->lock, flags);
0182 
0183     wake_up(&cep->waitq);
0184 }
0185 
0186 static void __erdma_cep_dealloc(struct kref *ref)
0187 {
0188     struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref);
0189     struct erdma_dev *dev = cep->dev;
0190     unsigned long flags;
0191 
0192     WARN_ON(cep->listen_cep);
0193 
0194     kfree(cep->private_data);
0195     kfree(cep->mpa.pdata);
0196     spin_lock_bh(&cep->lock);
0197     if (!list_empty(&cep->work_freelist))
0198         erdma_cm_free_work(cep);
0199     spin_unlock_bh(&cep->lock);
0200 
0201     spin_lock_irqsave(&dev->lock, flags);
0202     list_del(&cep->devq);
0203     spin_unlock_irqrestore(&dev->lock, flags);
0204     kfree(cep);
0205 }
0206 
0207 static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep)
0208 {
0209     struct erdma_cm_work *work = NULL;
0210 
0211     spin_lock_bh(&cep->lock);
0212     if (!list_empty(&cep->work_freelist)) {
0213         work = list_entry(cep->work_freelist.next, struct erdma_cm_work,
0214                   list);
0215         list_del_init(&work->list);
0216     }
0217 
0218     spin_unlock_bh(&cep->lock);
0219     return work;
0220 }
0221 
0222 static int erdma_cm_alloc_work(struct erdma_cep *cep, int num)
0223 {
0224     struct erdma_cm_work *work;
0225 
0226     while (num--) {
0227         work = kmalloc(sizeof(*work), GFP_KERNEL);
0228         if (!work) {
0229             if (!(list_empty(&cep->work_freelist)))
0230                 erdma_cm_free_work(cep);
0231             return -ENOMEM;
0232         }
0233         work->cep = cep;
0234         INIT_LIST_HEAD(&work->list);
0235         list_add(&work->list, &cep->work_freelist);
0236     }
0237 
0238     return 0;
0239 }
0240 
0241 static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason,
0242                int status)
0243 {
0244     struct iw_cm_event event;
0245     struct iw_cm_id *cm_id;
0246 
0247     memset(&event, 0, sizeof(event));
0248     event.status = status;
0249     event.event = reason;
0250 
0251     if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
0252         event.provider_data = cep;
0253         cm_id = cep->listen_cep->cm_id;
0254 
0255         event.ird = cep->dev->attrs.max_ird;
0256         event.ord = cep->dev->attrs.max_ord;
0257     } else {
0258         cm_id = cep->cm_id;
0259     }
0260 
0261     if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
0262         reason == IW_CM_EVENT_CONNECT_REPLY) {
0263         u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
0264 
0265         if (pd_len && cep->mpa.pdata) {
0266             event.private_data_len = pd_len;
0267             event.private_data = cep->mpa.pdata;
0268         }
0269 
0270         getname_local(cep->sock, &event.local_addr);
0271         getname_peer(cep->sock, &event.remote_addr);
0272     }
0273 
0274     return cm_id->event_handler(cm_id, &event);
0275 }
0276 
0277 void erdma_qp_cm_drop(struct erdma_qp *qp)
0278 {
0279     struct erdma_cep *cep = qp->cep;
0280 
0281     if (!qp->cep)
0282         return;
0283 
0284     erdma_cep_set_inuse(cep);
0285 
0286     /* already closed. */
0287     if (cep->state == ERDMA_EPSTATE_CLOSED)
0288         goto out;
0289 
0290     if (cep->cm_id) {
0291         switch (cep->state) {
0292         case ERDMA_EPSTATE_AWAIT_MPAREP:
0293             erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
0294                     -EINVAL);
0295             break;
0296         case ERDMA_EPSTATE_RDMA_MODE:
0297             erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
0298             break;
0299         case ERDMA_EPSTATE_IDLE:
0300         case ERDMA_EPSTATE_LISTENING:
0301         case ERDMA_EPSTATE_CONNECTING:
0302         case ERDMA_EPSTATE_AWAIT_MPAREQ:
0303         case ERDMA_EPSTATE_RECVD_MPAREQ:
0304         case ERDMA_EPSTATE_CLOSED:
0305         default:
0306             break;
0307         }
0308         cep->cm_id->rem_ref(cep->cm_id);
0309         cep->cm_id = NULL;
0310         erdma_cep_put(cep);
0311     }
0312     cep->state = ERDMA_EPSTATE_CLOSED;
0313 
0314     if (cep->sock) {
0315         erdma_socket_disassoc(cep->sock);
0316         sock_release(cep->sock);
0317         cep->sock = NULL;
0318     }
0319 
0320     if (cep->qp) {
0321         cep->qp = NULL;
0322         erdma_qp_put(qp);
0323     }
0324 out:
0325     erdma_cep_set_free(cep);
0326 }
0327 
0328 void erdma_cep_put(struct erdma_cep *cep)
0329 {
0330     WARN_ON(kref_read(&cep->ref) < 1);
0331     kref_put(&cep->ref, __erdma_cep_dealloc);
0332 }
0333 
0334 void erdma_cep_get(struct erdma_cep *cep)
0335 {
0336     kref_get(&cep->ref);
0337 }
0338 
0339 static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
0340                 u8 pd_len)
0341 {
0342     struct socket *s = cep->sock;
0343     struct mpa_rr *rr = &cep->mpa.hdr;
0344     struct kvec iov[3];
0345     struct msghdr msg;
0346     int iovec_num = 0;
0347     int ret;
0348     int mpa_len;
0349 
0350     memset(&msg, 0, sizeof(msg));
0351 
0352     rr->params.pd_len = cpu_to_be16(pd_len);
0353 
0354     iov[iovec_num].iov_base = rr;
0355     iov[iovec_num].iov_len = sizeof(*rr);
0356     iovec_num++;
0357     mpa_len = sizeof(*rr);
0358 
0359     iov[iovec_num].iov_base = &cep->mpa.ext_data;
0360     iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
0361     iovec_num++;
0362     mpa_len += sizeof(cep->mpa.ext_data);
0363 
0364     if (pd_len) {
0365         iov[iovec_num].iov_base = (char *)pdata;
0366         iov[iovec_num].iov_len = pd_len;
0367         mpa_len += pd_len;
0368         iovec_num++;
0369     }
0370 
0371     ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len);
0372 
0373     return ret < 0 ? ret : 0;
0374 }
0375 
0376 static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
0377                  int flags)
0378 {
0379     struct kvec iov = { buf, size };
0380     struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
0381 
0382     return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
0383 }
0384 
0385 static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr,
0386               int hdr_size, int *rcvd_out)
0387 {
0388     struct socket *s = cep->sock;
0389     int rcvd;
0390 
0391     *rcvd_out = 0;
0392     if (hdr_rcvd < hdr_size) {
0393         rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd,
0394                   MSG_DONTWAIT);
0395         if (rcvd == -EAGAIN)
0396             return -EAGAIN;
0397 
0398         if (rcvd <= 0)
0399             return -ECONNABORTED;
0400 
0401         hdr_rcvd += rcvd;
0402         *rcvd_out = rcvd;
0403 
0404         if (hdr_rcvd < hdr_size)
0405             return -EAGAIN;
0406     }
0407 
0408     return 0;
0409 }
0410 
0411 static void __mpa_rr_set_revision(__be16 *bits, u8 rev)
0412 {
0413     *bits = (*bits & ~MPA_RR_MASK_REVISION) |
0414         (cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
0415 }
0416 
0417 static u8 __mpa_rr_revision(__be16 mpa_rr_bits)
0418 {
0419     __be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
0420 
0421     return (u8)be16_to_cpu(rev);
0422 }
0423 
0424 static void __mpa_ext_set_cc(__be32 *bits, u32 cc)
0425 {
0426     *bits = (*bits & ~MPA_EXT_FLAG_CC) |
0427         (cpu_to_be32(cc) & MPA_EXT_FLAG_CC);
0428 }
0429 
0430 static u8 __mpa_ext_cc(__be32 mpa_ext_bits)
0431 {
0432     __be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC;
0433 
0434     return (u8)be32_to_cpu(cc);
0435 }
0436 
0437 /*
0438  * Receive MPA Request/Reply header.
0439  *
0440  * Returns 0 if complete MPA Request/Reply haeder including
0441  * eventual private data was received. Returns -EAGAIN if
0442  * header was partially received or negative error code otherwise.
0443  *
0444  * Context: May be called in process context only
0445  */
0446 static int erdma_recv_mpa_rr(struct erdma_cep *cep)
0447 {
0448     struct mpa_rr *hdr = &cep->mpa.hdr;
0449     struct socket *s = cep->sock;
0450     u16 pd_len;
0451     int rcvd, to_rcv, ret, pd_rcvd;
0452 
0453     if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
0454         ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd,
0455                      (char *)&cep->mpa.hdr,
0456                      sizeof(struct mpa_rr), &rcvd);
0457         cep->mpa.bytes_rcvd += rcvd;
0458         if (ret)
0459             return ret;
0460     }
0461 
0462     if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA ||
0463         __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1)
0464         return -EPROTO;
0465 
0466     if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) <
0467         sizeof(struct erdma_mpa_ext)) {
0468         ret = __recv_mpa_hdr(
0469             cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
0470             (char *)&cep->mpa.ext_data,
0471             sizeof(struct erdma_mpa_ext), &rcvd);
0472         cep->mpa.bytes_rcvd += rcvd;
0473         if (ret)
0474             return ret;
0475     }
0476 
0477     pd_len = be16_to_cpu(hdr->params.pd_len);
0478     pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) -
0479           sizeof(struct erdma_mpa_ext);
0480     to_rcv = pd_len - pd_rcvd;
0481 
0482     if (!to_rcv) {
0483         /*
0484          * We have received the whole MPA Request/Reply message.
0485          * Check against peer protocol violation.
0486          */
0487         u32 word;
0488 
0489         ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
0490                      &rcvd);
0491         if (ret == -EAGAIN && rcvd == 0)
0492             return 0;
0493 
0494         if (ret)
0495             return ret;
0496 
0497         return -EPROTO;
0498     }
0499 
0500     /*
0501      * At this point, MPA header has been fully received, and pd_len != 0.
0502      * So, begin to receive private data.
0503      */
0504     if (!cep->mpa.pdata) {
0505         cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
0506         if (!cep->mpa.pdata)
0507             return -ENOMEM;
0508     }
0509 
0510     rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
0511               MSG_DONTWAIT);
0512     if (rcvd < 0)
0513         return rcvd;
0514 
0515     if (rcvd > to_rcv)
0516         return -EPROTO;
0517 
0518     cep->mpa.bytes_rcvd += rcvd;
0519 
0520     if (to_rcv == rcvd)
0521         return 0;
0522 
0523     return -EAGAIN;
0524 }
0525 
0526 /*
0527  * erdma_proc_mpareq()
0528  *
0529  * Read MPA Request from socket and signal new connection to IWCM
0530  * if success. Caller must hold lock on corresponding listening CEP.
0531  */
0532 static int erdma_proc_mpareq(struct erdma_cep *cep)
0533 {
0534     struct mpa_rr *req;
0535     int ret;
0536 
0537     ret = erdma_recv_mpa_rr(cep);
0538     if (ret)
0539         return ret;
0540 
0541     req = &cep->mpa.hdr;
0542 
0543     if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
0544         return -EPROTO;
0545 
0546     memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE);
0547 
0548     /* Currently does not support marker and crc. */
0549     if (req->params.bits & MPA_RR_FLAG_MARKERS ||
0550         req->params.bits & MPA_RR_FLAG_CRC)
0551         goto reject_conn;
0552 
0553     cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
0554 
0555     /* Keep reference until IWCM accepts/rejects */
0556     erdma_cep_get(cep);
0557     ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
0558     if (ret)
0559         erdma_cep_put(cep);
0560 
0561     return ret;
0562 
0563 reject_conn:
0564     req->params.bits &= ~MPA_RR_FLAG_MARKERS;
0565     req->params.bits |= MPA_RR_FLAG_REJECT;
0566     req->params.bits &= ~MPA_RR_FLAG_CRC;
0567 
0568     kfree(cep->mpa.pdata);
0569     cep->mpa.pdata = NULL;
0570     erdma_send_mpareqrep(cep, NULL, 0);
0571 
0572     return -EOPNOTSUPP;
0573 }
0574 
0575 static int erdma_proc_mpareply(struct erdma_cep *cep)
0576 {
0577     struct erdma_qp_attrs qp_attrs;
0578     struct erdma_qp *qp = cep->qp;
0579     struct mpa_rr *rep;
0580     int ret;
0581 
0582     ret = erdma_recv_mpa_rr(cep);
0583     if (ret)
0584         goto out_err;
0585 
0586     erdma_cancel_mpatimer(cep);
0587 
0588     rep = &cep->mpa.hdr;
0589 
0590     if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
0591         ret = -EPROTO;
0592         goto out_err;
0593     }
0594 
0595     if (rep->params.bits & MPA_RR_FLAG_REJECT) {
0596         erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
0597         return -ECONNRESET;
0598     }
0599 
0600     /* Currently does not support marker and crc. */
0601     if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
0602         (rep->params.bits & MPA_RR_FLAG_CRC)) {
0603         erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
0604         return -EINVAL;
0605     }
0606 
0607     memset(&qp_attrs, 0, sizeof(qp_attrs));
0608     qp_attrs.irq_size = cep->ird;
0609     qp_attrs.orq_size = cep->ord;
0610     qp_attrs.state = ERDMA_QP_STATE_RTS;
0611 
0612     down_write(&qp->state_lock);
0613     if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
0614         ret = -EINVAL;
0615         up_write(&qp->state_lock);
0616         goto out_err;
0617     }
0618 
0619     qp->attrs.qp_type = ERDMA_QP_ACTIVE;
0620     if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc)
0621         qp->attrs.cc = COMPROMISE_CC;
0622 
0623     ret = erdma_modify_qp_internal(qp, &qp_attrs,
0624                        ERDMA_QP_ATTR_STATE |
0625                        ERDMA_QP_ATTR_LLP_HANDLE |
0626                        ERDMA_QP_ATTR_MPA);
0627 
0628     up_write(&qp->state_lock);
0629 
0630     if (!ret) {
0631         ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
0632         if (!ret)
0633             cep->state = ERDMA_EPSTATE_RDMA_MODE;
0634 
0635         return 0;
0636     }
0637 
0638 out_err:
0639     if (ret != -EAGAIN)
0640         erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
0641 
0642     return ret;
0643 }
0644 
0645 static void erdma_accept_newconn(struct erdma_cep *cep)
0646 {
0647     struct socket *s = cep->sock;
0648     struct socket *new_s = NULL;
0649     struct erdma_cep *new_cep = NULL;
0650     int ret = 0;
0651 
0652     if (cep->state != ERDMA_EPSTATE_LISTENING)
0653         goto error;
0654 
0655     new_cep = erdma_cep_alloc(cep->dev);
0656     if (!new_cep)
0657         goto error;
0658 
0659     /*
0660      * 4: Allocate a sufficient number of work elements
0661      * to allow concurrent handling of local + peer close
0662      * events, MPA header processing + MPA timeout.
0663      */
0664     if (erdma_cm_alloc_work(new_cep, 4) != 0)
0665         goto error;
0666 
0667     /*
0668      * Copy saved socket callbacks from listening CEP
0669      * and assign new socket with new CEP
0670      */
0671     new_cep->sk_state_change = cep->sk_state_change;
0672     new_cep->sk_data_ready = cep->sk_data_ready;
0673     new_cep->sk_error_report = cep->sk_error_report;
0674 
0675     ret = kernel_accept(s, &new_s, O_NONBLOCK);
0676     if (ret != 0)
0677         goto error;
0678 
0679     new_cep->sock = new_s;
0680     erdma_cep_get(new_cep);
0681     new_s->sk->sk_user_data = new_cep;
0682 
0683     tcp_sock_set_nodelay(new_s->sk);
0684     new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ;
0685 
0686     ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT);
0687     if (ret)
0688         goto error;
0689 
0690     new_cep->listen_cep = cep;
0691     erdma_cep_get(cep);
0692 
0693     if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
0694         /* MPA REQ already queued */
0695         erdma_cep_set_inuse(new_cep);
0696         ret = erdma_proc_mpareq(new_cep);
0697         if (ret != -EAGAIN) {
0698             erdma_cep_put(cep);
0699             new_cep->listen_cep = NULL;
0700             if (ret) {
0701                 erdma_cep_set_free(new_cep);
0702                 goto error;
0703             }
0704         }
0705         erdma_cep_set_free(new_cep);
0706     }
0707     return;
0708 
0709 error:
0710     if (new_cep) {
0711         new_cep->state = ERDMA_EPSTATE_CLOSED;
0712         erdma_cancel_mpatimer(new_cep);
0713 
0714         erdma_cep_put(new_cep);
0715         new_cep->sock = NULL;
0716     }
0717 
0718     if (new_s) {
0719         erdma_socket_disassoc(new_s);
0720         sock_release(new_s);
0721     }
0722 }
0723 
0724 static int erdma_newconn_connected(struct erdma_cep *cep)
0725 {
0726     int ret = 0;
0727 
0728     cep->mpa.hdr.params.bits = 0;
0729     __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
0730 
0731     memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
0732     cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
0733     __mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
0734 
0735     ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
0736     cep->state = ERDMA_EPSTATE_AWAIT_MPAREP;
0737     cep->mpa.hdr.params.pd_len = 0;
0738 
0739     if (ret >= 0)
0740         ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT);
0741 
0742     return ret;
0743 }
0744 
0745 static void erdma_cm_work_handler(struct work_struct *w)
0746 {
0747     struct erdma_cm_work *work;
0748     struct erdma_cep *cep;
0749     int release_cep = 0, ret = 0;
0750 
0751     work = container_of(w, struct erdma_cm_work, work.work);
0752     cep = work->cep;
0753 
0754     erdma_cep_set_inuse(cep);
0755 
0756     switch (work->type) {
0757     case ERDMA_CM_WORK_CONNECTED:
0758         erdma_cancel_mpatimer(cep);
0759         if (cep->state == ERDMA_EPSTATE_CONNECTING) {
0760             ret = erdma_newconn_connected(cep);
0761             if (ret) {
0762                 erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
0763                         -EIO);
0764                 release_cep = 1;
0765             }
0766         }
0767         break;
0768     case ERDMA_CM_WORK_CONNECTTIMEOUT:
0769         if (cep->state == ERDMA_EPSTATE_CONNECTING) {
0770             cep->mpa_timer = NULL;
0771             erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
0772                     -ETIMEDOUT);
0773             release_cep = 1;
0774         }
0775         break;
0776     case ERDMA_CM_WORK_ACCEPT:
0777         erdma_accept_newconn(cep);
0778         break;
0779     case ERDMA_CM_WORK_READ_MPAHDR:
0780         if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
0781             if (cep->listen_cep) {
0782                 erdma_cep_set_inuse(cep->listen_cep);
0783 
0784                 if (cep->listen_cep->state ==
0785                     ERDMA_EPSTATE_LISTENING)
0786                     ret = erdma_proc_mpareq(cep);
0787                 else
0788                     ret = -EFAULT;
0789 
0790                 erdma_cep_set_free(cep->listen_cep);
0791 
0792                 if (ret != -EAGAIN) {
0793                     erdma_cep_put(cep->listen_cep);
0794                     cep->listen_cep = NULL;
0795                     if (ret)
0796                         erdma_cep_put(cep);
0797                 }
0798             }
0799         } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
0800             ret = erdma_proc_mpareply(cep);
0801         }
0802 
0803         if (ret && ret != -EAGAIN)
0804             release_cep = 1;
0805         break;
0806     case ERDMA_CM_WORK_CLOSE_LLP:
0807         if (cep->cm_id)
0808             erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
0809         release_cep = 1;
0810         break;
0811     case ERDMA_CM_WORK_PEER_CLOSE:
0812         if (cep->cm_id) {
0813             if (cep->state == ERDMA_EPSTATE_CONNECTING ||
0814                 cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
0815                 /*
0816                  * MPA reply not received, but connection drop
0817                  */
0818                 erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
0819                         -ECONNRESET);
0820             } else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
0821                 /*
0822                  * NOTE: IW_CM_EVENT_DISCONNECT is given just
0823                  *       to transition IWCM into CLOSING.
0824                  */
0825                 erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
0826                 erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
0827             }
0828         } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
0829             /* Socket close before MPA request received. */
0830             erdma_disassoc_listen_cep(cep);
0831             erdma_cep_put(cep);
0832         }
0833         release_cep = 1;
0834         break;
0835     case ERDMA_CM_WORK_MPATIMEOUT:
0836         cep->mpa_timer = NULL;
0837         if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
0838             /*
0839              * MPA request timed out:
0840              * Hide any partially received private data and signal
0841              * timeout
0842              */
0843             cep->mpa.hdr.params.pd_len = 0;
0844 
0845             if (cep->cm_id)
0846                 erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
0847                         -ETIMEDOUT);
0848             release_cep = 1;
0849         } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
0850             /* No MPA req received after peer TCP stream setup. */
0851             erdma_disassoc_listen_cep(cep);
0852 
0853             erdma_cep_put(cep);
0854             release_cep = 1;
0855         }
0856         break;
0857     default:
0858         WARN(1, "Undefined CM work type: %d\n", work->type);
0859     }
0860 
0861     if (release_cep) {
0862         erdma_cancel_mpatimer(cep);
0863         cep->state = ERDMA_EPSTATE_CLOSED;
0864         if (cep->qp) {
0865             struct erdma_qp *qp = cep->qp;
0866             /*
0867              * Serialize a potential race with application
0868              * closing the QP and calling erdma_qp_cm_drop()
0869              */
0870             erdma_qp_get(qp);
0871             erdma_cep_set_free(cep);
0872 
0873             erdma_qp_llp_close(qp);
0874             erdma_qp_put(qp);
0875 
0876             erdma_cep_set_inuse(cep);
0877             cep->qp = NULL;
0878             erdma_qp_put(qp);
0879         }
0880 
0881         if (cep->sock) {
0882             erdma_socket_disassoc(cep->sock);
0883             sock_release(cep->sock);
0884             cep->sock = NULL;
0885         }
0886 
0887         if (cep->cm_id) {
0888             cep->cm_id->rem_ref(cep->cm_id);
0889             cep->cm_id = NULL;
0890             if (cep->state != ERDMA_EPSTATE_LISTENING)
0891                 erdma_cep_put(cep);
0892         }
0893     }
0894     erdma_cep_set_free(cep);
0895     erdma_put_work(work);
0896     erdma_cep_put(cep);
0897 }
0898 
0899 int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type)
0900 {
0901     struct erdma_cm_work *work = erdma_get_work(cep);
0902     unsigned long delay = 0;
0903 
0904     if (!work)
0905         return -ENOMEM;
0906 
0907     work->type = type;
0908     work->cep = cep;
0909 
0910     erdma_cep_get(cep);
0911 
0912     INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler);
0913 
0914     if (type == ERDMA_CM_WORK_MPATIMEOUT) {
0915         cep->mpa_timer = work;
0916 
0917         if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
0918             delay = MPAREP_TIMEOUT;
0919         else
0920             delay = MPAREQ_TIMEOUT;
0921     } else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) {
0922         cep->mpa_timer = work;
0923 
0924         delay = CONNECT_TIMEOUT;
0925     }
0926 
0927     queue_delayed_work(erdma_cm_wq, &work->work, delay);
0928 
0929     return 0;
0930 }
0931 
0932 static void erdma_cm_llp_data_ready(struct sock *sk)
0933 {
0934     struct erdma_cep *cep;
0935 
0936     read_lock(&sk->sk_callback_lock);
0937 
0938     cep = sk_to_cep(sk);
0939     if (!cep)
0940         goto out;
0941 
0942     if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ ||
0943         cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
0944         erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR);
0945 
0946 out:
0947     read_unlock(&sk->sk_callback_lock);
0948 }
0949 
0950 static void erdma_cm_llp_error_report(struct sock *sk)
0951 {
0952     struct erdma_cep *cep = sk_to_cep(sk);
0953 
0954     if (cep)
0955         cep->sk_error_report(sk);
0956 }
0957 
0958 static void erdma_cm_llp_state_change(struct sock *sk)
0959 {
0960     struct erdma_cep *cep;
0961     void (*orig_state_change)(struct sock *sk);
0962 
0963     read_lock(&sk->sk_callback_lock);
0964 
0965     cep = sk_to_cep(sk);
0966     if (!cep) {
0967         read_unlock(&sk->sk_callback_lock);
0968         return;
0969     }
0970     orig_state_change = cep->sk_state_change;
0971 
0972     switch (sk->sk_state) {
0973     case TCP_ESTABLISHED:
0974         if (cep->state == ERDMA_EPSTATE_CONNECTING)
0975             erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
0976         else
0977             erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT);
0978         break;
0979     case TCP_CLOSE:
0980     case TCP_CLOSE_WAIT:
0981         if (cep->state != ERDMA_EPSTATE_LISTENING)
0982             erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE);
0983         break;
0984     default:
0985         break;
0986     }
0987     read_unlock(&sk->sk_callback_lock);
0988     orig_state_change(sk);
0989 }
0990 
0991 static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
0992                   int laddrlen, struct sockaddr *raddr,
0993                   int raddrlen, int flags)
0994 {
0995     int ret;
0996 
0997     sock_set_reuseaddr(s->sk);
0998     ret = s->ops->bind(s, laddr, laddrlen);
0999     if (ret)
1000         return ret;
1001     ret = s->ops->connect(s, raddr, raddrlen, flags);
1002     return ret < 0 ? ret : 0;
1003 }
1004 
1005 int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
1006 {
1007     struct erdma_dev *dev = to_edev(id->device);
1008     struct erdma_qp *qp;
1009     struct erdma_cep *cep = NULL;
1010     struct socket *s = NULL;
1011     struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr;
1012     struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr;
1013     u16 pd_len = params->private_data_len;
1014     int ret;
1015 
1016     if (pd_len > MPA_MAX_PRIVDATA)
1017         return -EINVAL;
1018 
1019     if (params->ird > dev->attrs.max_ird ||
1020         params->ord > dev->attrs.max_ord)
1021         return -EINVAL;
1022 
1023     if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET)
1024         return -EAFNOSUPPORT;
1025 
1026     qp = find_qp_by_qpn(dev, params->qpn);
1027     if (!qp)
1028         return -ENOENT;
1029     erdma_qp_get(qp);
1030 
1031     ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
1032     if (ret < 0)
1033         goto error_put_qp;
1034 
1035     cep = erdma_cep_alloc(dev);
1036     if (!cep) {
1037         ret = -ENOMEM;
1038         goto error_release_sock;
1039     }
1040 
1041     erdma_cep_set_inuse(cep);
1042 
1043     /* Associate QP with CEP */
1044     erdma_cep_get(cep);
1045     qp->cep = cep;
1046     cep->qp = qp;
1047 
1048     /* Associate cm_id with CEP */
1049     id->add_ref(id);
1050     cep->cm_id = id;
1051 
1052     /*
1053      * 6: Allocate a sufficient number of work elements
1054      * to allow concurrent handling of local + peer close
1055      * events, MPA header processing + MPA timeout, connected event
1056      * and connect timeout.
1057      */
1058     ret = erdma_cm_alloc_work(cep, 6);
1059     if (ret != 0) {
1060         ret = -ENOMEM;
1061         goto error_release_cep;
1062     }
1063 
1064     cep->ird = params->ird;
1065     cep->ord = params->ord;
1066     cep->state = ERDMA_EPSTATE_CONNECTING;
1067 
1068     erdma_cep_socket_assoc(cep, s);
1069 
1070     if (pd_len) {
1071         cep->pd_len = pd_len;
1072         cep->private_data = kmalloc(pd_len, GFP_KERNEL);
1073         if (!cep->private_data) {
1074             ret = -ENOMEM;
1075             goto error_disassoc;
1076         }
1077 
1078         memcpy(cep->private_data, params->private_data,
1079                params->private_data_len);
1080     }
1081 
1082     ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr,
1083                  sizeof(*raddr), O_NONBLOCK);
1084     if (ret != -EINPROGRESS && ret != 0) {
1085         goto error_disassoc;
1086     } else if (ret == 0) {
1087         ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
1088         if (ret)
1089             goto error_disassoc;
1090     } else {
1091         ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT);
1092         if (ret)
1093             goto error_disassoc;
1094     }
1095 
1096     erdma_cep_set_free(cep);
1097     return 0;
1098 
1099 error_disassoc:
1100     kfree(cep->private_data);
1101     cep->private_data = NULL;
1102     cep->pd_len = 0;
1103 
1104     erdma_socket_disassoc(s);
1105 
1106 error_release_cep:
1107     /* disassoc with cm_id */
1108     cep->cm_id = NULL;
1109     id->rem_ref(id);
1110 
1111     /* disassoc with qp */
1112     qp->cep = NULL;
1113     erdma_cep_put(cep);
1114     cep->qp = NULL;
1115 
1116     cep->state = ERDMA_EPSTATE_CLOSED;
1117 
1118     erdma_cep_set_free(cep);
1119 
1120     /* release the cep. */
1121     erdma_cep_put(cep);
1122 
1123 error_release_sock:
1124     if (s)
1125         sock_release(s);
1126 error_put_qp:
1127     erdma_qp_put(qp);
1128 
1129     return ret;
1130 }
1131 
1132 int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
1133 {
1134     struct erdma_dev *dev = to_edev(id->device);
1135     struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1136     struct erdma_qp *qp;
1137     struct erdma_qp_attrs qp_attrs;
1138     int ret;
1139 
1140     erdma_cep_set_inuse(cep);
1141     erdma_cep_put(cep);
1142 
1143     /* Free lingering inbound private data */
1144     if (cep->mpa.hdr.params.pd_len) {
1145         cep->mpa.hdr.params.pd_len = 0;
1146         kfree(cep->mpa.pdata);
1147         cep->mpa.pdata = NULL;
1148     }
1149     erdma_cancel_mpatimer(cep);
1150 
1151     if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1152         erdma_cep_set_free(cep);
1153         erdma_cep_put(cep);
1154 
1155         return -ECONNRESET;
1156     }
1157 
1158     qp = find_qp_by_qpn(dev, params->qpn);
1159     if (!qp)
1160         return -ENOENT;
1161     erdma_qp_get(qp);
1162 
1163     down_write(&qp->state_lock);
1164     if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
1165         ret = -EINVAL;
1166         up_write(&qp->state_lock);
1167         goto error;
1168     }
1169 
1170     if (params->ord > dev->attrs.max_ord ||
1171         params->ird > dev->attrs.max_ord) {
1172         ret = -EINVAL;
1173         up_write(&qp->state_lock);
1174         goto error;
1175     }
1176 
1177     if (params->private_data_len > MPA_MAX_PRIVDATA) {
1178         ret = -EINVAL;
1179         up_write(&qp->state_lock);
1180         goto error;
1181     }
1182 
1183     cep->ird = params->ird;
1184     cep->ord = params->ord;
1185 
1186     cep->cm_id = id;
1187     id->add_ref(id);
1188 
1189     memset(&qp_attrs, 0, sizeof(qp_attrs));
1190     qp_attrs.orq_size = params->ord;
1191     qp_attrs.irq_size = params->ird;
1192 
1193     qp_attrs.state = ERDMA_QP_STATE_RTS;
1194 
1195     /* Associate QP with CEP */
1196     erdma_cep_get(cep);
1197     qp->cep = cep;
1198     cep->qp = qp;
1199 
1200     cep->state = ERDMA_EPSTATE_RDMA_MODE;
1201 
1202     qp->attrs.qp_type = ERDMA_QP_PASSIVE;
1203     qp->attrs.pd_len = params->private_data_len;
1204 
1205     if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits))
1206         qp->attrs.cc = COMPROMISE_CC;
1207 
1208     /* move to rts */
1209     ret = erdma_modify_qp_internal(qp, &qp_attrs,
1210                        ERDMA_QP_ATTR_STATE |
1211                        ERDMA_QP_ATTR_ORD |
1212                        ERDMA_QP_ATTR_LLP_HANDLE |
1213                        ERDMA_QP_ATTR_IRD |
1214                        ERDMA_QP_ATTR_MPA);
1215     up_write(&qp->state_lock);
1216 
1217     if (ret)
1218         goto error;
1219 
1220     cep->mpa.ext_data.bits = 0;
1221     __mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
1222     cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
1223 
1224     ret = erdma_send_mpareqrep(cep, params->private_data,
1225                    params->private_data_len);
1226     if (!ret) {
1227         ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
1228         if (ret)
1229             goto error;
1230 
1231         erdma_cep_set_free(cep);
1232 
1233         return 0;
1234     }
1235 
1236 error:
1237     erdma_socket_disassoc(cep->sock);
1238     sock_release(cep->sock);
1239     cep->sock = NULL;
1240 
1241     cep->state = ERDMA_EPSTATE_CLOSED;
1242 
1243     if (cep->cm_id) {
1244         cep->cm_id->rem_ref(id);
1245         cep->cm_id = NULL;
1246     }
1247 
1248     if (qp->cep) {
1249         erdma_cep_put(cep);
1250         qp->cep = NULL;
1251     }
1252 
1253     cep->qp = NULL;
1254     erdma_qp_put(qp);
1255 
1256     erdma_cep_set_free(cep);
1257     erdma_cep_put(cep);
1258 
1259     return ret;
1260 }
1261 
1262 int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
1263 {
1264     struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1265 
1266     erdma_cep_set_inuse(cep);
1267     erdma_cep_put(cep);
1268 
1269     erdma_cancel_mpatimer(cep);
1270 
1271     if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1272         erdma_cep_set_free(cep);
1273         erdma_cep_put(cep);
1274 
1275         return -ECONNRESET;
1276     }
1277 
1278     if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) {
1279         cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
1280         erdma_send_mpareqrep(cep, pdata, plen);
1281     }
1282 
1283     erdma_socket_disassoc(cep->sock);
1284     sock_release(cep->sock);
1285     cep->sock = NULL;
1286 
1287     cep->state = ERDMA_EPSTATE_CLOSED;
1288 
1289     erdma_cep_set_free(cep);
1290     erdma_cep_put(cep);
1291 
1292     return 0;
1293 }
1294 
1295 int erdma_create_listen(struct iw_cm_id *id, int backlog)
1296 {
1297     struct socket *s;
1298     struct erdma_cep *cep = NULL;
1299     int ret = 0;
1300     struct erdma_dev *dev = to_edev(id->device);
1301     int addr_family = id->local_addr.ss_family;
1302     struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
1303 
1304     if (addr_family != AF_INET)
1305         return -EAFNOSUPPORT;
1306 
1307     ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
1308     if (ret < 0)
1309         return ret;
1310 
1311     sock_set_reuseaddr(s->sk);
1312 
1313     /* For wildcard addr, limit binding to current device only */
1314     if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
1315         s->sk->sk_bound_dev_if = dev->netdev->ifindex;
1316 
1317     ret = s->ops->bind(s, (struct sockaddr *)laddr,
1318                sizeof(struct sockaddr_in));
1319     if (ret)
1320         goto error;
1321 
1322     cep = erdma_cep_alloc(dev);
1323     if (!cep) {
1324         ret = -ENOMEM;
1325         goto error;
1326     }
1327     erdma_cep_socket_assoc(cep, s);
1328 
1329     ret = erdma_cm_alloc_work(cep, backlog);
1330     if (ret)
1331         goto error;
1332 
1333     ret = s->ops->listen(s, backlog);
1334     if (ret)
1335         goto error;
1336 
1337     cep->cm_id = id;
1338     id->add_ref(id);
1339 
1340     if (!id->provider_data) {
1341         id->provider_data =
1342             kmalloc(sizeof(struct list_head), GFP_KERNEL);
1343         if (!id->provider_data) {
1344             ret = -ENOMEM;
1345             goto error;
1346         }
1347         INIT_LIST_HEAD((struct list_head *)id->provider_data);
1348     }
1349 
1350     list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
1351     cep->state = ERDMA_EPSTATE_LISTENING;
1352 
1353     return 0;
1354 
1355 error:
1356     if (cep) {
1357         erdma_cep_set_inuse(cep);
1358 
1359         if (cep->cm_id) {
1360             cep->cm_id->rem_ref(cep->cm_id);
1361             cep->cm_id = NULL;
1362         }
1363         cep->sock = NULL;
1364         erdma_socket_disassoc(s);
1365         cep->state = ERDMA_EPSTATE_CLOSED;
1366 
1367         erdma_cep_set_free(cep);
1368         erdma_cep_put(cep);
1369     }
1370     sock_release(s);
1371 
1372     return ret;
1373 }
1374 
1375 static void erdma_drop_listeners(struct iw_cm_id *id)
1376 {
1377     struct list_head *p, *tmp;
1378     /*
1379      * In case of a wildcard rdma_listen on a multi-homed device,
1380      * a listener's IWCM id is associated with more than one listening CEP.
1381      */
1382     list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
1383         struct erdma_cep *cep =
1384             list_entry(p, struct erdma_cep, listenq);
1385 
1386         list_del(p);
1387 
1388         erdma_cep_set_inuse(cep);
1389 
1390         if (cep->cm_id) {
1391             cep->cm_id->rem_ref(cep->cm_id);
1392             cep->cm_id = NULL;
1393         }
1394         if (cep->sock) {
1395             erdma_socket_disassoc(cep->sock);
1396             sock_release(cep->sock);
1397             cep->sock = NULL;
1398         }
1399         cep->state = ERDMA_EPSTATE_CLOSED;
1400         erdma_cep_set_free(cep);
1401         erdma_cep_put(cep);
1402     }
1403 }
1404 
1405 int erdma_destroy_listen(struct iw_cm_id *id)
1406 {
1407     if (!id->provider_data)
1408         return 0;
1409 
1410     erdma_drop_listeners(id);
1411     kfree(id->provider_data);
1412     id->provider_data = NULL;
1413 
1414     return 0;
1415 }
1416 
1417 int erdma_cm_init(void)
1418 {
1419     erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq");
1420     if (!erdma_cm_wq)
1421         return -ENOMEM;
1422 
1423     return 0;
1424 }
1425 
1426 void erdma_cm_exit(void)
1427 {
1428     if (erdma_cm_wq)
1429         destroy_workqueue(erdma_cm_wq);
1430 }