Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
0004  *
0005  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
0006  *  applies to SOCK_STREAM sockets only
0007  *  offers an alternative communication option for TCP-protocol sockets
0008  *  applicable with RoCE-cards only
0009  *
0010  *  Initial restrictions:
0011  *    - support for alternate links postponed
0012  *
0013  *  Copyright IBM Corp. 2016, 2018
0014  *
0015  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
0016  *              based on prototype from Frank Blaschka
0017  */
0018 
0019 #define KMSG_COMPONENT "smc"
0020 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
0021 
0022 #include <linux/module.h>
0023 #include <linux/socket.h>
0024 #include <linux/workqueue.h>
0025 #include <linux/in.h>
0026 #include <linux/sched/signal.h>
0027 #include <linux/if_vlan.h>
0028 #include <linux/rcupdate_wait.h>
0029 #include <linux/ctype.h>
0030 
0031 #include <net/sock.h>
0032 #include <net/tcp.h>
0033 #include <net/smc.h>
0034 #include <asm/ioctls.h>
0035 
0036 #include <net/net_namespace.h>
0037 #include <net/netns/generic.h>
0038 #include "smc_netns.h"
0039 
0040 #include "smc.h"
0041 #include "smc_clc.h"
0042 #include "smc_llc.h"
0043 #include "smc_cdc.h"
0044 #include "smc_core.h"
0045 #include "smc_ib.h"
0046 #include "smc_ism.h"
0047 #include "smc_pnet.h"
0048 #include "smc_netlink.h"
0049 #include "smc_tx.h"
0050 #include "smc_rx.h"
0051 #include "smc_close.h"
0052 #include "smc_stats.h"
0053 #include "smc_tracepoint.h"
0054 #include "smc_sysctl.h"
0055 
0056 static DEFINE_MUTEX(smc_server_lgr_pending);    /* serialize link group
0057                          * creation on server
0058                          */
0059 static DEFINE_MUTEX(smc_client_lgr_pending);    /* serialize link group
0060                          * creation on client
0061                          */
0062 
0063 static struct workqueue_struct  *smc_tcp_ls_wq; /* wq for tcp listen work */
0064 struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
0065 struct workqueue_struct *smc_close_wq;  /* wq for close work */
0066 
0067 static void smc_tcp_listen_work(struct work_struct *);
0068 static void smc_connect_work(struct work_struct *);
0069 
0070 int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb)
0071 {
0072     struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
0073     void *hdr;
0074 
0075     if (cb_ctx->pos[0])
0076         goto out;
0077 
0078     hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
0079               &smc_gen_nl_family, NLM_F_MULTI,
0080               SMC_NETLINK_DUMP_HS_LIMITATION);
0081     if (!hdr)
0082         return -ENOMEM;
0083 
0084     if (nla_put_u8(skb, SMC_NLA_HS_LIMITATION_ENABLED,
0085                sock_net(skb->sk)->smc.limit_smc_hs))
0086         goto err;
0087 
0088     genlmsg_end(skb, hdr);
0089     cb_ctx->pos[0] = 1;
0090 out:
0091     return skb->len;
0092 err:
0093     genlmsg_cancel(skb, hdr);
0094     return -EMSGSIZE;
0095 }
0096 
0097 int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
0098 {
0099     sock_net(skb->sk)->smc.limit_smc_hs = true;
0100     return 0;
0101 }
0102 
0103 int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
0104 {
0105     sock_net(skb->sk)->smc.limit_smc_hs = false;
0106     return 0;
0107 }
0108 
0109 static void smc_set_keepalive(struct sock *sk, int val)
0110 {
0111     struct smc_sock *smc = smc_sk(sk);
0112 
0113     smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
0114 }
0115 
0116 static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk,
0117                       struct sk_buff *skb,
0118                       struct request_sock *req,
0119                       struct dst_entry *dst,
0120                       struct request_sock *req_unhash,
0121                       bool *own_req)
0122 {
0123     struct smc_sock *smc;
0124     struct sock *child;
0125 
0126     smc = smc_clcsock_user_data(sk);
0127 
0128     if (READ_ONCE(sk->sk_ack_backlog) + atomic_read(&smc->queued_smc_hs) >
0129                 sk->sk_max_ack_backlog)
0130         goto drop;
0131 
0132     if (sk_acceptq_is_full(&smc->sk)) {
0133         NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
0134         goto drop;
0135     }
0136 
0137     /* passthrough to original syn recv sock fct */
0138     child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash,
0139                            own_req);
0140     /* child must not inherit smc or its ops */
0141     if (child) {
0142         rcu_assign_sk_user_data(child, NULL);
0143 
0144         /* v4-mapped sockets don't inherit parent ops. Don't restore. */
0145         if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops)
0146             inet_csk(child)->icsk_af_ops = smc->ori_af_ops;
0147     }
0148     return child;
0149 
0150 drop:
0151     dst_release(dst);
0152     tcp_listendrop(sk);
0153     return NULL;
0154 }
0155 
0156 static bool smc_hs_congested(const struct sock *sk)
0157 {
0158     const struct smc_sock *smc;
0159 
0160     smc = smc_clcsock_user_data(sk);
0161 
0162     if (!smc)
0163         return true;
0164 
0165     if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
0166         return true;
0167 
0168     return false;
0169 }
0170 
0171 static struct smc_hashinfo smc_v4_hashinfo = {
0172     .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
0173 };
0174 
0175 static struct smc_hashinfo smc_v6_hashinfo = {
0176     .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
0177 };
0178 
0179 int smc_hash_sk(struct sock *sk)
0180 {
0181     struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
0182     struct hlist_head *head;
0183 
0184     head = &h->ht;
0185 
0186     write_lock_bh(&h->lock);
0187     sk_add_node(sk, head);
0188     write_unlock_bh(&h->lock);
0189     sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
0190 
0191     return 0;
0192 }
0193 EXPORT_SYMBOL_GPL(smc_hash_sk);
0194 
0195 void smc_unhash_sk(struct sock *sk)
0196 {
0197     struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
0198 
0199     write_lock_bh(&h->lock);
0200     if (sk_del_node_init(sk))
0201         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
0202     write_unlock_bh(&h->lock);
0203 }
0204 EXPORT_SYMBOL_GPL(smc_unhash_sk);
0205 
0206 /* This will be called before user really release sock_lock. So do the
0207  * work which we didn't do because of user hold the sock_lock in the
0208  * BH context
0209  */
0210 static void smc_release_cb(struct sock *sk)
0211 {
0212     struct smc_sock *smc = smc_sk(sk);
0213 
0214     if (smc->conn.tx_in_release_sock) {
0215         smc_tx_pending(&smc->conn);
0216         smc->conn.tx_in_release_sock = false;
0217     }
0218 }
0219 
0220 struct proto smc_proto = {
0221     .name       = "SMC",
0222     .owner      = THIS_MODULE,
0223     .keepalive  = smc_set_keepalive,
0224     .hash       = smc_hash_sk,
0225     .unhash     = smc_unhash_sk,
0226     .release_cb = smc_release_cb,
0227     .obj_size   = sizeof(struct smc_sock),
0228     .h.smc_hash = &smc_v4_hashinfo,
0229     .slab_flags = SLAB_TYPESAFE_BY_RCU,
0230 };
0231 EXPORT_SYMBOL_GPL(smc_proto);
0232 
0233 struct proto smc_proto6 = {
0234     .name       = "SMC6",
0235     .owner      = THIS_MODULE,
0236     .keepalive  = smc_set_keepalive,
0237     .hash       = smc_hash_sk,
0238     .unhash     = smc_unhash_sk,
0239     .release_cb = smc_release_cb,
0240     .obj_size   = sizeof(struct smc_sock),
0241     .h.smc_hash = &smc_v6_hashinfo,
0242     .slab_flags = SLAB_TYPESAFE_BY_RCU,
0243 };
0244 EXPORT_SYMBOL_GPL(smc_proto6);
0245 
0246 static void smc_fback_restore_callbacks(struct smc_sock *smc)
0247 {
0248     struct sock *clcsk = smc->clcsock->sk;
0249 
0250     write_lock_bh(&clcsk->sk_callback_lock);
0251     clcsk->sk_user_data = NULL;
0252 
0253     smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
0254     smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
0255     smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
0256     smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
0257 
0258     write_unlock_bh(&clcsk->sk_callback_lock);
0259 }
0260 
0261 static void smc_restore_fallback_changes(struct smc_sock *smc)
0262 {
0263     if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
0264         smc->clcsock->file->private_data = smc->sk.sk_socket;
0265         smc->clcsock->file = NULL;
0266         smc_fback_restore_callbacks(smc);
0267     }
0268 }
0269 
0270 static int __smc_release(struct smc_sock *smc)
0271 {
0272     struct sock *sk = &smc->sk;
0273     int rc = 0;
0274 
0275     if (!smc->use_fallback) {
0276         rc = smc_close_active(smc);
0277         sock_set_flag(sk, SOCK_DEAD);
0278         sk->sk_shutdown |= SHUTDOWN_MASK;
0279     } else {
0280         if (sk->sk_state != SMC_CLOSED) {
0281             if (sk->sk_state != SMC_LISTEN &&
0282                 sk->sk_state != SMC_INIT)
0283                 sock_put(sk); /* passive closing */
0284             if (sk->sk_state == SMC_LISTEN) {
0285                 /* wake up clcsock accept */
0286                 rc = kernel_sock_shutdown(smc->clcsock,
0287                               SHUT_RDWR);
0288             }
0289             sk->sk_state = SMC_CLOSED;
0290             sk->sk_state_change(sk);
0291         }
0292         smc_restore_fallback_changes(smc);
0293     }
0294 
0295     sk->sk_prot->unhash(sk);
0296 
0297     if (sk->sk_state == SMC_CLOSED) {
0298         if (smc->clcsock) {
0299             release_sock(sk);
0300             smc_clcsock_release(smc);
0301             lock_sock(sk);
0302         }
0303         if (!smc->use_fallback)
0304             smc_conn_free(&smc->conn);
0305     }
0306 
0307     return rc;
0308 }
0309 
0310 static int smc_release(struct socket *sock)
0311 {
0312     struct sock *sk = sock->sk;
0313     struct smc_sock *smc;
0314     int old_state, rc = 0;
0315 
0316     if (!sk)
0317         goto out;
0318 
0319     sock_hold(sk); /* sock_put below */
0320     smc = smc_sk(sk);
0321 
0322     old_state = sk->sk_state;
0323 
0324     /* cleanup for a dangling non-blocking connect */
0325     if (smc->connect_nonblock && old_state == SMC_INIT)
0326         tcp_abort(smc->clcsock->sk, ECONNABORTED);
0327 
0328     if (cancel_work_sync(&smc->connect_work))
0329         sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */
0330 
0331     if (sk->sk_state == SMC_LISTEN)
0332         /* smc_close_non_accepted() is called and acquires
0333          * sock lock for child sockets again
0334          */
0335         lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
0336     else
0337         lock_sock(sk);
0338 
0339     if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
0340         !smc->use_fallback)
0341         smc_close_active_abort(smc);
0342 
0343     rc = __smc_release(smc);
0344 
0345     /* detach socket */
0346     sock_orphan(sk);
0347     sock->sk = NULL;
0348     release_sock(sk);
0349 
0350     sock_put(sk); /* sock_hold above */
0351     sock_put(sk); /* final sock_put */
0352 out:
0353     return rc;
0354 }
0355 
0356 static void smc_destruct(struct sock *sk)
0357 {
0358     if (sk->sk_state != SMC_CLOSED)
0359         return;
0360     if (!sock_flag(sk, SOCK_DEAD))
0361         return;
0362 
0363     sk_refcnt_debug_dec(sk);
0364 }
0365 
0366 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
0367                    int protocol)
0368 {
0369     struct smc_sock *smc;
0370     struct proto *prot;
0371     struct sock *sk;
0372 
0373     prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
0374     sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
0375     if (!sk)
0376         return NULL;
0377 
0378     sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
0379     sk->sk_state = SMC_INIT;
0380     sk->sk_destruct = smc_destruct;
0381     sk->sk_protocol = protocol;
0382     smc = smc_sk(sk);
0383     INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
0384     INIT_WORK(&smc->connect_work, smc_connect_work);
0385     INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
0386     INIT_LIST_HEAD(&smc->accept_q);
0387     spin_lock_init(&smc->accept_q_lock);
0388     spin_lock_init(&smc->conn.send_lock);
0389     sk->sk_prot->hash(sk);
0390     sk_refcnt_debug_inc(sk);
0391     mutex_init(&smc->clcsock_release_lock);
0392     smc_init_saved_callbacks(smc);
0393 
0394     return sk;
0395 }
0396 
0397 static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
0398             int addr_len)
0399 {
0400     struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
0401     struct sock *sk = sock->sk;
0402     struct smc_sock *smc;
0403     int rc;
0404 
0405     smc = smc_sk(sk);
0406 
0407     /* replicate tests from inet_bind(), to be safe wrt. future changes */
0408     rc = -EINVAL;
0409     if (addr_len < sizeof(struct sockaddr_in))
0410         goto out;
0411 
0412     rc = -EAFNOSUPPORT;
0413     if (addr->sin_family != AF_INET &&
0414         addr->sin_family != AF_INET6 &&
0415         addr->sin_family != AF_UNSPEC)
0416         goto out;
0417     /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
0418     if (addr->sin_family == AF_UNSPEC &&
0419         addr->sin_addr.s_addr != htonl(INADDR_ANY))
0420         goto out;
0421 
0422     lock_sock(sk);
0423 
0424     /* Check if socket is already active */
0425     rc = -EINVAL;
0426     if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
0427         goto out_rel;
0428 
0429     smc->clcsock->sk->sk_reuse = sk->sk_reuse;
0430     rc = kernel_bind(smc->clcsock, uaddr, addr_len);
0431 
0432 out_rel:
0433     release_sock(sk);
0434 out:
0435     return rc;
0436 }
0437 
0438 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
0439                    unsigned long mask)
0440 {
0441     /* options we don't get control via setsockopt for */
0442     nsk->sk_type = osk->sk_type;
0443     nsk->sk_sndbuf = osk->sk_sndbuf;
0444     nsk->sk_rcvbuf = osk->sk_rcvbuf;
0445     nsk->sk_sndtimeo = osk->sk_sndtimeo;
0446     nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
0447     nsk->sk_mark = osk->sk_mark;
0448     nsk->sk_priority = osk->sk_priority;
0449     nsk->sk_rcvlowat = osk->sk_rcvlowat;
0450     nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
0451     nsk->sk_err = osk->sk_err;
0452 
0453     nsk->sk_flags &= ~mask;
0454     nsk->sk_flags |= osk->sk_flags & mask;
0455 }
0456 
0457 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
0458                  (1UL << SOCK_KEEPOPEN) | \
0459                  (1UL << SOCK_LINGER) | \
0460                  (1UL << SOCK_BROADCAST) | \
0461                  (1UL << SOCK_TIMESTAMP) | \
0462                  (1UL << SOCK_DBG) | \
0463                  (1UL << SOCK_RCVTSTAMP) | \
0464                  (1UL << SOCK_RCVTSTAMPNS) | \
0465                  (1UL << SOCK_LOCALROUTE) | \
0466                  (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
0467                  (1UL << SOCK_RXQ_OVFL) | \
0468                  (1UL << SOCK_WIFI_STATUS) | \
0469                  (1UL << SOCK_NOFCS) | \
0470                  (1UL << SOCK_FILTER_LOCKED) | \
0471                  (1UL << SOCK_TSTAMP_NEW))
0472 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
0473  * clc socket (since smc is not called for these options from net/core)
0474  */
0475 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
0476 {
0477     smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
0478 }
0479 
0480 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
0481                  (1UL << SOCK_KEEPOPEN) | \
0482                  (1UL << SOCK_LINGER) | \
0483                  (1UL << SOCK_DBG))
0484 /* copy only settings and flags relevant for smc from clc to smc socket */
0485 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
0486 {
0487     smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
0488 }
0489 
0490 /* register the new vzalloced sndbuf on all links */
0491 static int smcr_lgr_reg_sndbufs(struct smc_link *link,
0492                 struct smc_buf_desc *snd_desc)
0493 {
0494     struct smc_link_group *lgr = link->lgr;
0495     int i, rc = 0;
0496 
0497     if (!snd_desc->is_vm)
0498         return -EINVAL;
0499 
0500     /* protect against parallel smcr_link_reg_buf() */
0501     mutex_lock(&lgr->llc_conf_mutex);
0502     for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
0503         if (!smc_link_active(&lgr->lnk[i]))
0504             continue;
0505         rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc);
0506         if (rc)
0507             break;
0508     }
0509     mutex_unlock(&lgr->llc_conf_mutex);
0510     return rc;
0511 }
0512 
0513 /* register the new rmb on all links */
0514 static int smcr_lgr_reg_rmbs(struct smc_link *link,
0515                  struct smc_buf_desc *rmb_desc)
0516 {
0517     struct smc_link_group *lgr = link->lgr;
0518     int i, rc = 0;
0519 
0520     rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
0521     if (rc)
0522         return rc;
0523     /* protect against parallel smc_llc_cli_rkey_exchange() and
0524      * parallel smcr_link_reg_buf()
0525      */
0526     mutex_lock(&lgr->llc_conf_mutex);
0527     for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
0528         if (!smc_link_active(&lgr->lnk[i]))
0529             continue;
0530         rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc);
0531         if (rc)
0532             goto out;
0533     }
0534 
0535     /* exchange confirm_rkey msg with peer */
0536     rc = smc_llc_do_confirm_rkey(link, rmb_desc);
0537     if (rc) {
0538         rc = -EFAULT;
0539         goto out;
0540     }
0541     rmb_desc->is_conf_rkey = true;
0542 out:
0543     mutex_unlock(&lgr->llc_conf_mutex);
0544     smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
0545     return rc;
0546 }
0547 
0548 static int smcr_clnt_conf_first_link(struct smc_sock *smc)
0549 {
0550     struct smc_link *link = smc->conn.lnk;
0551     struct smc_llc_qentry *qentry;
0552     int rc;
0553 
0554     /* receive CONFIRM LINK request from server over RoCE fabric */
0555     qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
0556                   SMC_LLC_CONFIRM_LINK);
0557     if (!qentry) {
0558         struct smc_clc_msg_decline dclc;
0559 
0560         rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
0561                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
0562         return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
0563     }
0564     smc_llc_save_peer_uid(qentry);
0565     rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
0566     smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
0567     if (rc)
0568         return SMC_CLC_DECL_RMBE_EC;
0569 
0570     rc = smc_ib_modify_qp_rts(link);
0571     if (rc)
0572         return SMC_CLC_DECL_ERR_RDYLNK;
0573 
0574     smc_wr_remember_qp_attr(link);
0575 
0576     /* reg the sndbuf if it was vzalloced */
0577     if (smc->conn.sndbuf_desc->is_vm) {
0578         if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
0579             return SMC_CLC_DECL_ERR_REGBUF;
0580     }
0581 
0582     /* reg the rmb */
0583     if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
0584         return SMC_CLC_DECL_ERR_REGBUF;
0585 
0586     /* confirm_rkey is implicit on 1st contact */
0587     smc->conn.rmb_desc->is_conf_rkey = true;
0588 
0589     /* send CONFIRM LINK response over RoCE fabric */
0590     rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
0591     if (rc < 0)
0592         return SMC_CLC_DECL_TIMEOUT_CL;
0593 
0594     smc_llc_link_active(link);
0595     smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
0596 
0597     /* optional 2nd link, receive ADD LINK request from server */
0598     qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
0599                   SMC_LLC_ADD_LINK);
0600     if (!qentry) {
0601         struct smc_clc_msg_decline dclc;
0602 
0603         rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
0604                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
0605         if (rc == -EAGAIN)
0606             rc = 0; /* no DECLINE received, go with one link */
0607         return rc;
0608     }
0609     smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
0610     smc_llc_cli_add_link(link, qentry);
0611     return 0;
0612 }
0613 
0614 static bool smc_isascii(char *hostname)
0615 {
0616     int i;
0617 
0618     for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
0619         if (!isascii(hostname[i]))
0620             return false;
0621     return true;
0622 }
0623 
0624 static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
0625                     struct smc_clc_msg_accept_confirm *clc)
0626 {
0627     struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
0628         (struct smc_clc_msg_accept_confirm_v2 *)clc;
0629     struct smc_clc_first_contact_ext *fce;
0630     int clc_v2_len;
0631 
0632     if (clc->hdr.version == SMC_V1 ||
0633         !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
0634         return;
0635 
0636     if (smc->conn.lgr->is_smcd) {
0637         memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
0638                SMC_MAX_EID_LEN);
0639         clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
0640                      d1);
0641     } else {
0642         memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
0643                SMC_MAX_EID_LEN);
0644         clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
0645                      r1);
0646     }
0647     fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
0648     smc->conn.lgr->peer_os = fce->os_type;
0649     smc->conn.lgr->peer_smc_release = fce->release;
0650     if (smc_isascii(fce->hostname))
0651         memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
0652                SMC_MAX_HOSTNAME_LEN);
0653 }
0654 
0655 static void smcr_conn_save_peer_info(struct smc_sock *smc,
0656                      struct smc_clc_msg_accept_confirm *clc)
0657 {
0658     int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
0659 
0660     smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
0661     smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
0662     smc->conn.peer_rmbe_size = bufsize;
0663     atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
0664     smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
0665 }
0666 
0667 static void smcd_conn_save_peer_info(struct smc_sock *smc,
0668                      struct smc_clc_msg_accept_confirm *clc)
0669 {
0670     int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
0671 
0672     smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
0673     smc->conn.peer_token = clc->d0.token;
0674     /* msg header takes up space in the buffer */
0675     smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
0676     atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
0677     smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
0678 }
0679 
0680 static void smc_conn_save_peer_info(struct smc_sock *smc,
0681                     struct smc_clc_msg_accept_confirm *clc)
0682 {
0683     if (smc->conn.lgr->is_smcd)
0684         smcd_conn_save_peer_info(smc, clc);
0685     else
0686         smcr_conn_save_peer_info(smc, clc);
0687     smc_conn_save_peer_info_fce(smc, clc);
0688 }
0689 
0690 static void smc_link_save_peer_info(struct smc_link *link,
0691                     struct smc_clc_msg_accept_confirm *clc,
0692                     struct smc_init_info *ini)
0693 {
0694     link->peer_qpn = ntoh24(clc->r0.qpn);
0695     memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
0696     memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
0697     link->peer_psn = ntoh24(clc->r0.psn);
0698     link->peer_mtu = clc->r0.qp_mtu;
0699 }
0700 
0701 static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
0702                        struct smc_stats_fback *fback_arr)
0703 {
0704     int cnt;
0705 
0706     for (cnt = 0; cnt < SMC_MAX_FBACK_RSN_CNT; cnt++) {
0707         if (fback_arr[cnt].fback_code == smc->fallback_rsn) {
0708             fback_arr[cnt].count++;
0709             break;
0710         }
0711         if (!fback_arr[cnt].fback_code) {
0712             fback_arr[cnt].fback_code = smc->fallback_rsn;
0713             fback_arr[cnt].count++;
0714             break;
0715         }
0716     }
0717 }
0718 
0719 static void smc_stat_fallback(struct smc_sock *smc)
0720 {
0721     struct net *net = sock_net(&smc->sk);
0722 
0723     mutex_lock(&net->smc.mutex_fback_rsn);
0724     if (smc->listen_smc) {
0725         smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->srv);
0726         net->smc.fback_rsn->srv_fback_cnt++;
0727     } else {
0728         smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->clnt);
0729         net->smc.fback_rsn->clnt_fback_cnt++;
0730     }
0731     mutex_unlock(&net->smc.mutex_fback_rsn);
0732 }
0733 
0734 /* must be called under rcu read lock */
0735 static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
0736 {
0737     struct socket_wq *wq;
0738     __poll_t flags;
0739 
0740     wq = rcu_dereference(smc->sk.sk_wq);
0741     if (!skwq_has_sleeper(wq))
0742         return;
0743 
0744     /* wake up smc sk->sk_wq */
0745     if (!key) {
0746         /* sk_state_change */
0747         wake_up_interruptible_all(&wq->wait);
0748     } else {
0749         flags = key_to_poll(key);
0750         if (flags & (EPOLLIN | EPOLLOUT))
0751             /* sk_data_ready or sk_write_space */
0752             wake_up_interruptible_sync_poll(&wq->wait, flags);
0753         else if (flags & EPOLLERR)
0754             /* sk_error_report */
0755             wake_up_interruptible_poll(&wq->wait, flags);
0756     }
0757 }
0758 
0759 static int smc_fback_mark_woken(wait_queue_entry_t *wait,
0760                 unsigned int mode, int sync, void *key)
0761 {
0762     struct smc_mark_woken *mark =
0763         container_of(wait, struct smc_mark_woken, wait_entry);
0764 
0765     mark->woken = true;
0766     mark->key = key;
0767     return 0;
0768 }
0769 
0770 static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
0771                      void (*clcsock_callback)(struct sock *sk))
0772 {
0773     struct smc_mark_woken mark = { .woken = false };
0774     struct socket_wq *wq;
0775 
0776     init_waitqueue_func_entry(&mark.wait_entry,
0777                   smc_fback_mark_woken);
0778     rcu_read_lock();
0779     wq = rcu_dereference(clcsk->sk_wq);
0780     if (!wq)
0781         goto out;
0782     add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
0783     clcsock_callback(clcsk);
0784     remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
0785 
0786     if (mark.woken)
0787         smc_fback_wakeup_waitqueue(smc, mark.key);
0788 out:
0789     rcu_read_unlock();
0790 }
0791 
0792 static void smc_fback_state_change(struct sock *clcsk)
0793 {
0794     struct smc_sock *smc;
0795 
0796     read_lock_bh(&clcsk->sk_callback_lock);
0797     smc = smc_clcsock_user_data(clcsk);
0798     if (smc)
0799         smc_fback_forward_wakeup(smc, clcsk,
0800                      smc->clcsk_state_change);
0801     read_unlock_bh(&clcsk->sk_callback_lock);
0802 }
0803 
0804 static void smc_fback_data_ready(struct sock *clcsk)
0805 {
0806     struct smc_sock *smc;
0807 
0808     read_lock_bh(&clcsk->sk_callback_lock);
0809     smc = smc_clcsock_user_data(clcsk);
0810     if (smc)
0811         smc_fback_forward_wakeup(smc, clcsk,
0812                      smc->clcsk_data_ready);
0813     read_unlock_bh(&clcsk->sk_callback_lock);
0814 }
0815 
0816 static void smc_fback_write_space(struct sock *clcsk)
0817 {
0818     struct smc_sock *smc;
0819 
0820     read_lock_bh(&clcsk->sk_callback_lock);
0821     smc = smc_clcsock_user_data(clcsk);
0822     if (smc)
0823         smc_fback_forward_wakeup(smc, clcsk,
0824                      smc->clcsk_write_space);
0825     read_unlock_bh(&clcsk->sk_callback_lock);
0826 }
0827 
0828 static void smc_fback_error_report(struct sock *clcsk)
0829 {
0830     struct smc_sock *smc;
0831 
0832     read_lock_bh(&clcsk->sk_callback_lock);
0833     smc = smc_clcsock_user_data(clcsk);
0834     if (smc)
0835         smc_fback_forward_wakeup(smc, clcsk,
0836                      smc->clcsk_error_report);
0837     read_unlock_bh(&clcsk->sk_callback_lock);
0838 }
0839 
0840 static void smc_fback_replace_callbacks(struct smc_sock *smc)
0841 {
0842     struct sock *clcsk = smc->clcsock->sk;
0843 
0844     write_lock_bh(&clcsk->sk_callback_lock);
0845     clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
0846 
0847     smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
0848                    &smc->clcsk_state_change);
0849     smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
0850                    &smc->clcsk_data_ready);
0851     smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
0852                    &smc->clcsk_write_space);
0853     smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
0854                    &smc->clcsk_error_report);
0855 
0856     write_unlock_bh(&clcsk->sk_callback_lock);
0857 }
0858 
0859 static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
0860 {
0861     int rc = 0;
0862 
0863     mutex_lock(&smc->clcsock_release_lock);
0864     if (!smc->clcsock) {
0865         rc = -EBADF;
0866         goto out;
0867     }
0868 
0869     smc->use_fallback = true;
0870     smc->fallback_rsn = reason_code;
0871     smc_stat_fallback(smc);
0872     trace_smc_switch_to_fallback(smc, reason_code);
0873     if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
0874         smc->clcsock->file = smc->sk.sk_socket->file;
0875         smc->clcsock->file->private_data = smc->clcsock;
0876         smc->clcsock->wq.fasync_list =
0877             smc->sk.sk_socket->wq.fasync_list;
0878 
0879         /* There might be some wait entries remaining
0880          * in smc sk->sk_wq and they should be woken up
0881          * as clcsock's wait queue is woken up.
0882          */
0883         smc_fback_replace_callbacks(smc);
0884     }
0885 out:
0886     mutex_unlock(&smc->clcsock_release_lock);
0887     return rc;
0888 }
0889 
0890 /* fall back during connect */
0891 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
0892 {
0893     struct net *net = sock_net(&smc->sk);
0894     int rc = 0;
0895 
0896     rc = smc_switch_to_fallback(smc, reason_code);
0897     if (rc) { /* fallback fails */
0898         this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
0899         if (smc->sk.sk_state == SMC_INIT)
0900             sock_put(&smc->sk); /* passive closing */
0901         return rc;
0902     }
0903     smc_copy_sock_settings_to_clc(smc);
0904     smc->connect_nonblock = 0;
0905     if (smc->sk.sk_state == SMC_INIT)
0906         smc->sk.sk_state = SMC_ACTIVE;
0907     return 0;
0908 }
0909 
0910 /* decline and fall back during connect */
0911 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
0912                     u8 version)
0913 {
0914     struct net *net = sock_net(&smc->sk);
0915     int rc;
0916 
0917     if (reason_code < 0) { /* error, fallback is not possible */
0918         this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
0919         if (smc->sk.sk_state == SMC_INIT)
0920             sock_put(&smc->sk); /* passive closing */
0921         return reason_code;
0922     }
0923     if (reason_code != SMC_CLC_DECL_PEERDECL) {
0924         rc = smc_clc_send_decline(smc, reason_code, version);
0925         if (rc < 0) {
0926             this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
0927             if (smc->sk.sk_state == SMC_INIT)
0928                 sock_put(&smc->sk); /* passive closing */
0929             return rc;
0930         }
0931     }
0932     return smc_connect_fallback(smc, reason_code);
0933 }
0934 
0935 static void smc_conn_abort(struct smc_sock *smc, int local_first)
0936 {
0937     struct smc_connection *conn = &smc->conn;
0938     struct smc_link_group *lgr = conn->lgr;
0939     bool lgr_valid = false;
0940 
0941     if (smc_conn_lgr_valid(conn))
0942         lgr_valid = true;
0943 
0944     smc_conn_free(conn);
0945     if (local_first && lgr_valid)
0946         smc_lgr_cleanup_early(lgr);
0947 }
0948 
0949 /* check if there is a rdma device available for this connection. */
0950 /* called for connect and listen */
0951 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
0952 {
0953     /* PNET table look up: search active ib_device and port
0954      * within same PNETID that also contains the ethernet device
0955      * used for the internal TCP socket
0956      */
0957     smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
0958     if (!ini->check_smcrv2 && !ini->ib_dev)
0959         return SMC_CLC_DECL_NOSMCRDEV;
0960     if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
0961         return SMC_CLC_DECL_NOSMCRDEV;
0962     return 0;
0963 }
0964 
0965 /* check if there is an ISM device available for this connection. */
0966 /* called for connect and listen */
0967 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
0968 {
0969     /* Find ISM device with same PNETID as connecting interface  */
0970     smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
0971     if (!ini->ism_dev[0])
0972         return SMC_CLC_DECL_NOSMCDDEV;
0973     else
0974         ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
0975     return 0;
0976 }
0977 
0978 /* is chid unique for the ism devices that are already determined? */
0979 static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini,
0980                        int cnt)
0981 {
0982     int i = (!ini->ism_dev[0]) ? 1 : 0;
0983 
0984     for (; i < cnt; i++)
0985         if (ini->ism_chid[i] == chid)
0986             return false;
0987     return true;
0988 }
0989 
0990 /* determine possible V2 ISM devices (either without PNETID or with PNETID plus
0991  * PNETID matching net_device)
0992  */
0993 static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
0994                        struct smc_init_info *ini)
0995 {
0996     int rc = SMC_CLC_DECL_NOSMCDDEV;
0997     struct smcd_dev *smcd;
0998     int i = 1;
0999     u16 chid;
1000 
1001     if (smcd_indicated(ini->smc_type_v1))
1002         rc = 0;     /* already initialized for V1 */
1003     mutex_lock(&smcd_dev_list.mutex);
1004     list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1005         if (smcd->going_away || smcd == ini->ism_dev[0])
1006             continue;
1007         chid = smc_ism_get_chid(smcd);
1008         if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
1009             continue;
1010         if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
1011             smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
1012             ini->ism_dev[i] = smcd;
1013             ini->ism_chid[i] = chid;
1014             ini->is_smcd = true;
1015             rc = 0;
1016             i++;
1017             if (i > SMC_MAX_ISM_DEVS)
1018                 break;
1019         }
1020     }
1021     mutex_unlock(&smcd_dev_list.mutex);
1022     ini->ism_offered_cnt = i - 1;
1023     if (!ini->ism_dev[0] && !ini->ism_dev[1])
1024         ini->smcd_version = 0;
1025 
1026     return rc;
1027 }
1028 
1029 /* Check for VLAN ID and register it on ISM device just for CLC handshake */
1030 static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
1031                       struct smc_init_info *ini)
1032 {
1033     if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
1034         return SMC_CLC_DECL_ISMVLANERR;
1035     return 0;
1036 }
1037 
1038 static int smc_find_proposal_devices(struct smc_sock *smc,
1039                      struct smc_init_info *ini)
1040 {
1041     int rc = 0;
1042 
1043     /* check if there is an ism device available */
1044     if (!(ini->smcd_version & SMC_V1) ||
1045         smc_find_ism_device(smc, ini) ||
1046         smc_connect_ism_vlan_setup(smc, ini))
1047         ini->smcd_version &= ~SMC_V1;
1048     /* else ISM V1 is supported for this connection */
1049 
1050     /* check if there is an rdma device available */
1051     if (!(ini->smcr_version & SMC_V1) ||
1052         smc_find_rdma_device(smc, ini))
1053         ini->smcr_version &= ~SMC_V1;
1054     /* else RDMA is supported for this connection */
1055 
1056     ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
1057                           ini->smcr_version & SMC_V1);
1058 
1059     /* check if there is an ism v2 device available */
1060     if (!(ini->smcd_version & SMC_V2) ||
1061         !smc_ism_is_v2_capable() ||
1062         smc_find_ism_v2_device_clnt(smc, ini))
1063         ini->smcd_version &= ~SMC_V2;
1064 
1065     /* check if there is an rdma v2 device available */
1066     ini->check_smcrv2 = true;
1067     ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
1068     if (!(ini->smcr_version & SMC_V2) ||
1069         smc->clcsock->sk->sk_family != AF_INET ||
1070         !smc_clc_ueid_count() ||
1071         smc_find_rdma_device(smc, ini))
1072         ini->smcr_version &= ~SMC_V2;
1073     ini->check_smcrv2 = false;
1074 
1075     ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
1076                           ini->smcr_version & SMC_V2);
1077 
1078     /* if neither ISM nor RDMA are supported, fallback */
1079     if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
1080         rc = SMC_CLC_DECL_NOSMCDEV;
1081 
1082     return rc;
1083 }
1084 
1085 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
1086  * used, the VLAN ID will be registered again during the connection setup.
1087  */
1088 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
1089                     struct smc_init_info *ini)
1090 {
1091     if (!smcd_indicated(ini->smc_type_v1))
1092         return 0;
1093     if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
1094         return SMC_CLC_DECL_CNFERR;
1095     return 0;
1096 }
1097 
1098 #define SMC_CLC_MAX_ACCEPT_LEN \
1099     (sizeof(struct smc_clc_msg_accept_confirm_v2) + \
1100      sizeof(struct smc_clc_first_contact_ext) + \
1101      sizeof(struct smc_clc_msg_trail))
1102 
1103 /* CLC handshake during connect */
1104 static int smc_connect_clc(struct smc_sock *smc,
1105                struct smc_clc_msg_accept_confirm_v2 *aclc2,
1106                struct smc_init_info *ini)
1107 {
1108     int rc = 0;
1109 
1110     /* do inband token exchange */
1111     rc = smc_clc_send_proposal(smc, ini);
1112     if (rc)
1113         return rc;
1114     /* receive SMC Accept CLC message */
1115     return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
1116                 SMC_CLC_ACCEPT, CLC_WAIT_TIME);
1117 }
1118 
1119 void smc_fill_gid_list(struct smc_link_group *lgr,
1120                struct smc_gidlist *gidlist,
1121                struct smc_ib_device *known_dev, u8 *known_gid)
1122 {
1123     struct smc_init_info *alt_ini = NULL;
1124 
1125     memset(gidlist, 0, sizeof(*gidlist));
1126     memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
1127 
1128     alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
1129     if (!alt_ini)
1130         goto out;
1131 
1132     alt_ini->vlan_id = lgr->vlan_id;
1133     alt_ini->check_smcrv2 = true;
1134     alt_ini->smcrv2.saddr = lgr->saddr;
1135     smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
1136 
1137     if (!alt_ini->smcrv2.ib_dev_v2)
1138         goto out;
1139 
1140     memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
1141            SMC_GID_SIZE);
1142 
1143 out:
1144     kfree(alt_ini);
1145 }
1146 
1147 static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
1148                        struct smc_clc_msg_accept_confirm *aclc,
1149                        struct smc_init_info *ini)
1150 {
1151     struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
1152         (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1153     struct smc_clc_first_contact_ext *fce =
1154         (struct smc_clc_first_contact_ext *)
1155             (((u8 *)clc_v2) + sizeof(*clc_v2));
1156 
1157     if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
1158         return 0;
1159 
1160     if (fce->v2_direct) {
1161         memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
1162         ini->smcrv2.uses_gateway = false;
1163     } else {
1164         if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
1165                       smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
1166                       ini->smcrv2.nexthop_mac,
1167                       &ini->smcrv2.uses_gateway))
1168             return SMC_CLC_DECL_NOROUTE;
1169         if (!ini->smcrv2.uses_gateway) {
1170             /* mismatch: peer claims indirect, but its direct */
1171             return SMC_CLC_DECL_NOINDIRECT;
1172         }
1173     }
1174     return 0;
1175 }
1176 
1177 /* setup for RDMA connection of client */
1178 static int smc_connect_rdma(struct smc_sock *smc,
1179                 struct smc_clc_msg_accept_confirm *aclc,
1180                 struct smc_init_info *ini)
1181 {
1182     int i, reason_code = 0;
1183     struct smc_link *link;
1184     u8 *eid = NULL;
1185 
1186     ini->is_smcd = false;
1187     ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
1188     ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
1189     memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
1190     memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
1191     memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
1192 
1193     reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
1194     if (reason_code)
1195         return reason_code;
1196 
1197     mutex_lock(&smc_client_lgr_pending);
1198     reason_code = smc_conn_create(smc, ini);
1199     if (reason_code) {
1200         mutex_unlock(&smc_client_lgr_pending);
1201         return reason_code;
1202     }
1203 
1204     smc_conn_save_peer_info(smc, aclc);
1205 
1206     if (ini->first_contact_local) {
1207         link = smc->conn.lnk;
1208     } else {
1209         /* set link that was assigned by server */
1210         link = NULL;
1211         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1212             struct smc_link *l = &smc->conn.lgr->lnk[i];
1213 
1214             if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
1215                 !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
1216                     SMC_GID_SIZE) &&
1217                 (aclc->hdr.version > SMC_V1 ||
1218                  !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
1219                      sizeof(l->peer_mac)))) {
1220                 link = l;
1221                 break;
1222             }
1223         }
1224         if (!link) {
1225             reason_code = SMC_CLC_DECL_NOSRVLINK;
1226             goto connect_abort;
1227         }
1228         smc_switch_link_and_count(&smc->conn, link);
1229     }
1230 
1231     /* create send buffer and rmb */
1232     if (smc_buf_create(smc, false)) {
1233         reason_code = SMC_CLC_DECL_MEM;
1234         goto connect_abort;
1235     }
1236 
1237     if (ini->first_contact_local)
1238         smc_link_save_peer_info(link, aclc, ini);
1239 
1240     if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
1241         reason_code = SMC_CLC_DECL_ERR_RTOK;
1242         goto connect_abort;
1243     }
1244 
1245     smc_close_init(smc);
1246     smc_rx_init(smc);
1247 
1248     if (ini->first_contact_local) {
1249         if (smc_ib_ready_link(link)) {
1250             reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1251             goto connect_abort;
1252         }
1253     } else {
1254         /* reg sendbufs if they were vzalloced */
1255         if (smc->conn.sndbuf_desc->is_vm) {
1256             if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) {
1257                 reason_code = SMC_CLC_DECL_ERR_REGBUF;
1258                 goto connect_abort;
1259             }
1260         }
1261         if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
1262             reason_code = SMC_CLC_DECL_ERR_REGBUF;
1263             goto connect_abort;
1264         }
1265     }
1266 
1267     if (aclc->hdr.version > SMC_V1) {
1268         struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
1269             (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1270 
1271         eid = clc_v2->r1.eid;
1272         if (ini->first_contact_local)
1273             smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
1274                       link->smcibdev, link->gid);
1275     }
1276 
1277     reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
1278                        aclc->hdr.version, eid, ini);
1279     if (reason_code)
1280         goto connect_abort;
1281 
1282     smc_tx_init(smc);
1283 
1284     if (ini->first_contact_local) {
1285         /* QP confirmation over RoCE fabric */
1286         smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
1287         reason_code = smcr_clnt_conf_first_link(smc);
1288         smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
1289         if (reason_code)
1290             goto connect_abort;
1291     }
1292     mutex_unlock(&smc_client_lgr_pending);
1293 
1294     smc_copy_sock_settings_to_clc(smc);
1295     smc->connect_nonblock = 0;
1296     if (smc->sk.sk_state == SMC_INIT)
1297         smc->sk.sk_state = SMC_ACTIVE;
1298 
1299     return 0;
1300 connect_abort:
1301     smc_conn_abort(smc, ini->first_contact_local);
1302     mutex_unlock(&smc_client_lgr_pending);
1303     smc->connect_nonblock = 0;
1304 
1305     return reason_code;
1306 }
1307 
1308 /* The server has chosen one of the proposed ISM devices for the communication.
1309  * Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
1310  */
1311 static int
1312 smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
1313                    struct smc_init_info *ini)
1314 {
1315     int i;
1316 
1317     for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
1318         if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
1319             ini->ism_selected = i;
1320             return 0;
1321         }
1322     }
1323 
1324     return -EPROTO;
1325 }
1326 
1327 /* setup for ISM connection of client */
1328 static int smc_connect_ism(struct smc_sock *smc,
1329                struct smc_clc_msg_accept_confirm *aclc,
1330                struct smc_init_info *ini)
1331 {
1332     u8 *eid = NULL;
1333     int rc = 0;
1334 
1335     ini->is_smcd = true;
1336     ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
1337 
1338     if (aclc->hdr.version == SMC_V2) {
1339         struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
1340             (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1341 
1342         rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
1343         if (rc)
1344             return rc;
1345     }
1346     ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
1347 
1348     /* there is only one lgr role for SMC-D; use server lock */
1349     mutex_lock(&smc_server_lgr_pending);
1350     rc = smc_conn_create(smc, ini);
1351     if (rc) {
1352         mutex_unlock(&smc_server_lgr_pending);
1353         return rc;
1354     }
1355 
1356     /* Create send and receive buffers */
1357     rc = smc_buf_create(smc, true);
1358     if (rc) {
1359         rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM;
1360         goto connect_abort;
1361     }
1362 
1363     smc_conn_save_peer_info(smc, aclc);
1364     smc_close_init(smc);
1365     smc_rx_init(smc);
1366     smc_tx_init(smc);
1367 
1368     if (aclc->hdr.version > SMC_V1) {
1369         struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
1370             (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1371 
1372         eid = clc_v2->d1.eid;
1373     }
1374 
1375     rc = smc_clc_send_confirm(smc, ini->first_contact_local,
1376                   aclc->hdr.version, eid, NULL);
1377     if (rc)
1378         goto connect_abort;
1379     mutex_unlock(&smc_server_lgr_pending);
1380 
1381     smc_copy_sock_settings_to_clc(smc);
1382     smc->connect_nonblock = 0;
1383     if (smc->sk.sk_state == SMC_INIT)
1384         smc->sk.sk_state = SMC_ACTIVE;
1385 
1386     return 0;
1387 connect_abort:
1388     smc_conn_abort(smc, ini->first_contact_local);
1389     mutex_unlock(&smc_server_lgr_pending);
1390     smc->connect_nonblock = 0;
1391 
1392     return rc;
1393 }
1394 
1395 /* check if received accept type and version matches a proposed one */
1396 static int smc_connect_check_aclc(struct smc_init_info *ini,
1397                   struct smc_clc_msg_accept_confirm *aclc)
1398 {
1399     if (aclc->hdr.typev1 != SMC_TYPE_R &&
1400         aclc->hdr.typev1 != SMC_TYPE_D)
1401         return SMC_CLC_DECL_MODEUNSUPP;
1402 
1403     if (aclc->hdr.version >= SMC_V2) {
1404         if ((aclc->hdr.typev1 == SMC_TYPE_R &&
1405              !smcr_indicated(ini->smc_type_v2)) ||
1406             (aclc->hdr.typev1 == SMC_TYPE_D &&
1407              !smcd_indicated(ini->smc_type_v2)))
1408             return SMC_CLC_DECL_MODEUNSUPP;
1409     } else {
1410         if ((aclc->hdr.typev1 == SMC_TYPE_R &&
1411              !smcr_indicated(ini->smc_type_v1)) ||
1412             (aclc->hdr.typev1 == SMC_TYPE_D &&
1413              !smcd_indicated(ini->smc_type_v1)))
1414             return SMC_CLC_DECL_MODEUNSUPP;
1415     }
1416 
1417     return 0;
1418 }
1419 
1420 /* perform steps before actually connecting */
1421 static int __smc_connect(struct smc_sock *smc)
1422 {
1423     u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
1424     struct smc_clc_msg_accept_confirm_v2 *aclc2;
1425     struct smc_clc_msg_accept_confirm *aclc;
1426     struct smc_init_info *ini = NULL;
1427     u8 *buf = NULL;
1428     int rc = 0;
1429 
1430     if (smc->use_fallback)
1431         return smc_connect_fallback(smc, smc->fallback_rsn);
1432 
1433     /* if peer has not signalled SMC-capability, fall back */
1434     if (!tcp_sk(smc->clcsock->sk)->syn_smc)
1435         return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
1436 
1437     /* IPSec connections opt out of SMC optimizations */
1438     if (using_ipsec(smc))
1439         return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
1440                             version);
1441 
1442     ini = kzalloc(sizeof(*ini), GFP_KERNEL);
1443     if (!ini)
1444         return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
1445                             version);
1446 
1447     ini->smcd_version = SMC_V1 | SMC_V2;
1448     ini->smcr_version = SMC_V1 | SMC_V2;
1449     ini->smc_type_v1 = SMC_TYPE_B;
1450     ini->smc_type_v2 = SMC_TYPE_B;
1451 
1452     /* get vlan id from IP device */
1453     if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
1454         ini->smcd_version &= ~SMC_V1;
1455         ini->smcr_version = 0;
1456         ini->smc_type_v1 = SMC_TYPE_N;
1457         if (!ini->smcd_version) {
1458             rc = SMC_CLC_DECL_GETVLANERR;
1459             goto fallback;
1460         }
1461     }
1462 
1463     rc = smc_find_proposal_devices(smc, ini);
1464     if (rc)
1465         goto fallback;
1466 
1467     buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL);
1468     if (!buf) {
1469         rc = SMC_CLC_DECL_MEM;
1470         goto fallback;
1471     }
1472     aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
1473     aclc = (struct smc_clc_msg_accept_confirm *)aclc2;
1474 
1475     /* perform CLC handshake */
1476     rc = smc_connect_clc(smc, aclc2, ini);
1477     if (rc) {
1478         /* -EAGAIN on timeout, see tcp_recvmsg() */
1479         if (rc == -EAGAIN) {
1480             rc = -ETIMEDOUT;
1481             smc->sk.sk_err = ETIMEDOUT;
1482         }
1483         goto vlan_cleanup;
1484     }
1485 
1486     /* check if smc modes and versions of CLC proposal and accept match */
1487     rc = smc_connect_check_aclc(ini, aclc);
1488     version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
1489     if (rc)
1490         goto vlan_cleanup;
1491 
1492     /* depending on previous steps, connect using rdma or ism */
1493     if (aclc->hdr.typev1 == SMC_TYPE_R) {
1494         ini->smcr_version = version;
1495         rc = smc_connect_rdma(smc, aclc, ini);
1496     } else if (aclc->hdr.typev1 == SMC_TYPE_D) {
1497         ini->smcd_version = version;
1498         rc = smc_connect_ism(smc, aclc, ini);
1499     }
1500     if (rc)
1501         goto vlan_cleanup;
1502 
1503     SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
1504     smc_connect_ism_vlan_cleanup(smc, ini);
1505     kfree(buf);
1506     kfree(ini);
1507     return 0;
1508 
1509 vlan_cleanup:
1510     smc_connect_ism_vlan_cleanup(smc, ini);
1511     kfree(buf);
1512 fallback:
1513     kfree(ini);
1514     return smc_connect_decline_fallback(smc, rc, version);
1515 }
1516 
1517 static void smc_connect_work(struct work_struct *work)
1518 {
1519     struct smc_sock *smc = container_of(work, struct smc_sock,
1520                         connect_work);
1521     long timeo = smc->sk.sk_sndtimeo;
1522     int rc = 0;
1523 
1524     if (!timeo)
1525         timeo = MAX_SCHEDULE_TIMEOUT;
1526     lock_sock(smc->clcsock->sk);
1527     if (smc->clcsock->sk->sk_err) {
1528         smc->sk.sk_err = smc->clcsock->sk->sk_err;
1529     } else if ((1 << smc->clcsock->sk->sk_state) &
1530                     (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
1531         rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
1532         if ((rc == -EPIPE) &&
1533             ((1 << smc->clcsock->sk->sk_state) &
1534                     (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
1535             rc = 0;
1536     }
1537     release_sock(smc->clcsock->sk);
1538     lock_sock(&smc->sk);
1539     if (rc != 0 || smc->sk.sk_err) {
1540         smc->sk.sk_state = SMC_CLOSED;
1541         if (rc == -EPIPE || rc == -EAGAIN)
1542             smc->sk.sk_err = EPIPE;
1543         else if (rc == -ECONNREFUSED)
1544             smc->sk.sk_err = ECONNREFUSED;
1545         else if (signal_pending(current))
1546             smc->sk.sk_err = -sock_intr_errno(timeo);
1547         sock_put(&smc->sk); /* passive closing */
1548         goto out;
1549     }
1550 
1551     rc = __smc_connect(smc);
1552     if (rc < 0)
1553         smc->sk.sk_err = -rc;
1554 
1555 out:
1556     if (!sock_flag(&smc->sk, SOCK_DEAD)) {
1557         if (smc->sk.sk_err) {
1558             smc->sk.sk_state_change(&smc->sk);
1559         } else { /* allow polling before and after fallback decision */
1560             smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
1561             smc->sk.sk_write_space(&smc->sk);
1562         }
1563     }
1564     release_sock(&smc->sk);
1565 }
1566 
1567 static int smc_connect(struct socket *sock, struct sockaddr *addr,
1568                int alen, int flags)
1569 {
1570     struct sock *sk = sock->sk;
1571     struct smc_sock *smc;
1572     int rc = -EINVAL;
1573 
1574     smc = smc_sk(sk);
1575 
1576     /* separate smc parameter checking to be safe */
1577     if (alen < sizeof(addr->sa_family))
1578         goto out_err;
1579     if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
1580         goto out_err;
1581 
1582     lock_sock(sk);
1583     switch (sock->state) {
1584     default:
1585         rc = -EINVAL;
1586         goto out;
1587     case SS_CONNECTED:
1588         rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
1589         goto out;
1590     case SS_CONNECTING:
1591         if (sk->sk_state == SMC_ACTIVE)
1592             goto connected;
1593         break;
1594     case SS_UNCONNECTED:
1595         sock->state = SS_CONNECTING;
1596         break;
1597     }
1598 
1599     switch (sk->sk_state) {
1600     default:
1601         goto out;
1602     case SMC_CLOSED:
1603         rc = sock_error(sk) ? : -ECONNABORTED;
1604         sock->state = SS_UNCONNECTED;
1605         goto out;
1606     case SMC_ACTIVE:
1607         rc = -EISCONN;
1608         goto out;
1609     case SMC_INIT:
1610         break;
1611     }
1612 
1613     smc_copy_sock_settings_to_clc(smc);
1614     tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1615     if (smc->connect_nonblock) {
1616         rc = -EALREADY;
1617         goto out;
1618     }
1619     rc = kernel_connect(smc->clcsock, addr, alen, flags);
1620     if (rc && rc != -EINPROGRESS)
1621         goto out;
1622 
1623     if (smc->use_fallback) {
1624         sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
1625         goto out;
1626     }
1627     sock_hold(&smc->sk); /* sock put in passive closing */
1628     if (flags & O_NONBLOCK) {
1629         if (queue_work(smc_hs_wq, &smc->connect_work))
1630             smc->connect_nonblock = 1;
1631         rc = -EINPROGRESS;
1632         goto out;
1633     } else {
1634         rc = __smc_connect(smc);
1635         if (rc < 0)
1636             goto out;
1637     }
1638 
1639 connected:
1640     rc = 0;
1641     sock->state = SS_CONNECTED;
1642 out:
1643     release_sock(sk);
1644 out_err:
1645     return rc;
1646 }
1647 
1648 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
1649 {
1650     struct socket *new_clcsock = NULL;
1651     struct sock *lsk = &lsmc->sk;
1652     struct sock *new_sk;
1653     int rc = -EINVAL;
1654 
1655     release_sock(lsk);
1656     new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
1657     if (!new_sk) {
1658         rc = -ENOMEM;
1659         lsk->sk_err = ENOMEM;
1660         *new_smc = NULL;
1661         lock_sock(lsk);
1662         goto out;
1663     }
1664     *new_smc = smc_sk(new_sk);
1665 
1666     mutex_lock(&lsmc->clcsock_release_lock);
1667     if (lsmc->clcsock)
1668         rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
1669     mutex_unlock(&lsmc->clcsock_release_lock);
1670     lock_sock(lsk);
1671     if  (rc < 0 && rc != -EAGAIN)
1672         lsk->sk_err = -rc;
1673     if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
1674         new_sk->sk_prot->unhash(new_sk);
1675         if (new_clcsock)
1676             sock_release(new_clcsock);
1677         new_sk->sk_state = SMC_CLOSED;
1678         sock_set_flag(new_sk, SOCK_DEAD);
1679         sock_put(new_sk); /* final */
1680         *new_smc = NULL;
1681         goto out;
1682     }
1683 
1684     /* new clcsock has inherited the smc listen-specific sk_data_ready
1685      * function; switch it back to the original sk_data_ready function
1686      */
1687     new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
1688 
1689     /* if new clcsock has also inherited the fallback-specific callback
1690      * functions, switch them back to the original ones.
1691      */
1692     if (lsmc->use_fallback) {
1693         if (lsmc->clcsk_state_change)
1694             new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
1695         if (lsmc->clcsk_write_space)
1696             new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
1697         if (lsmc->clcsk_error_report)
1698             new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
1699     }
1700 
1701     (*new_smc)->clcsock = new_clcsock;
1702 out:
1703     return rc;
1704 }
1705 
1706 /* add a just created sock to the accept queue of the listen sock as
1707  * candidate for a following socket accept call from user space
1708  */
1709 static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
1710 {
1711     struct smc_sock *par = smc_sk(parent);
1712 
1713     sock_hold(sk); /* sock_put in smc_accept_unlink () */
1714     spin_lock(&par->accept_q_lock);
1715     list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
1716     spin_unlock(&par->accept_q_lock);
1717     sk_acceptq_added(parent);
1718 }
1719 
1720 /* remove a socket from the accept queue of its parental listening socket */
1721 static void smc_accept_unlink(struct sock *sk)
1722 {
1723     struct smc_sock *par = smc_sk(sk)->listen_smc;
1724 
1725     spin_lock(&par->accept_q_lock);
1726     list_del_init(&smc_sk(sk)->accept_q);
1727     spin_unlock(&par->accept_q_lock);
1728     sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
1729     sock_put(sk); /* sock_hold in smc_accept_enqueue */
1730 }
1731 
1732 /* remove a sock from the accept queue to bind it to a new socket created
1733  * for a socket accept call from user space
1734  */
1735 struct sock *smc_accept_dequeue(struct sock *parent,
1736                 struct socket *new_sock)
1737 {
1738     struct smc_sock *isk, *n;
1739     struct sock *new_sk;
1740 
1741     list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
1742         new_sk = (struct sock *)isk;
1743 
1744         smc_accept_unlink(new_sk);
1745         if (new_sk->sk_state == SMC_CLOSED) {
1746             new_sk->sk_prot->unhash(new_sk);
1747             if (isk->clcsock) {
1748                 sock_release(isk->clcsock);
1749                 isk->clcsock = NULL;
1750             }
1751             sock_put(new_sk); /* final */
1752             continue;
1753         }
1754         if (new_sock) {
1755             sock_graft(new_sk, new_sock);
1756             new_sock->state = SS_CONNECTED;
1757             if (isk->use_fallback) {
1758                 smc_sk(new_sk)->clcsock->file = new_sock->file;
1759                 isk->clcsock->file->private_data = isk->clcsock;
1760             }
1761         }
1762         return new_sk;
1763     }
1764     return NULL;
1765 }
1766 
1767 /* clean up for a created but never accepted sock */
1768 void smc_close_non_accepted(struct sock *sk)
1769 {
1770     struct smc_sock *smc = smc_sk(sk);
1771 
1772     sock_hold(sk); /* sock_put below */
1773     lock_sock(sk);
1774     if (!sk->sk_lingertime)
1775         /* wait for peer closing */
1776         sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
1777     __smc_release(smc);
1778     release_sock(sk);
1779     sock_put(sk); /* sock_hold above */
1780     sock_put(sk); /* final sock_put */
1781 }
1782 
1783 static int smcr_serv_conf_first_link(struct smc_sock *smc)
1784 {
1785     struct smc_link *link = smc->conn.lnk;
1786     struct smc_llc_qentry *qentry;
1787     int rc;
1788 
1789     /* reg the sndbuf if it was vzalloced*/
1790     if (smc->conn.sndbuf_desc->is_vm) {
1791         if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
1792             return SMC_CLC_DECL_ERR_REGBUF;
1793     }
1794 
1795     /* reg the rmb */
1796     if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
1797         return SMC_CLC_DECL_ERR_REGBUF;
1798 
1799     /* send CONFIRM LINK request to client over the RoCE fabric */
1800     rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
1801     if (rc < 0)
1802         return SMC_CLC_DECL_TIMEOUT_CL;
1803 
1804     /* receive CONFIRM LINK response from client over the RoCE fabric */
1805     qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
1806                   SMC_LLC_CONFIRM_LINK);
1807     if (!qentry) {
1808         struct smc_clc_msg_decline dclc;
1809 
1810         rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1811                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1812         return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
1813     }
1814     smc_llc_save_peer_uid(qentry);
1815     rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
1816     smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
1817     if (rc)
1818         return SMC_CLC_DECL_RMBE_EC;
1819 
1820     /* confirm_rkey is implicit on 1st contact */
1821     smc->conn.rmb_desc->is_conf_rkey = true;
1822 
1823     smc_llc_link_active(link);
1824     smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
1825 
1826     /* initial contact - try to establish second link */
1827     smc_llc_srv_add_link(link, NULL);
1828     return 0;
1829 }
1830 
1831 /* listen worker: finish */
1832 static void smc_listen_out(struct smc_sock *new_smc)
1833 {
1834     struct smc_sock *lsmc = new_smc->listen_smc;
1835     struct sock *newsmcsk = &new_smc->sk;
1836 
1837     if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
1838         atomic_dec(&lsmc->queued_smc_hs);
1839 
1840     if (lsmc->sk.sk_state == SMC_LISTEN) {
1841         lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1842         smc_accept_enqueue(&lsmc->sk, newsmcsk);
1843         release_sock(&lsmc->sk);
1844     } else { /* no longer listening */
1845         smc_close_non_accepted(newsmcsk);
1846     }
1847 
1848     /* Wake up accept */
1849     lsmc->sk.sk_data_ready(&lsmc->sk);
1850     sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1851 }
1852 
1853 /* listen worker: finish in state connected */
1854 static void smc_listen_out_connected(struct smc_sock *new_smc)
1855 {
1856     struct sock *newsmcsk = &new_smc->sk;
1857 
1858     if (newsmcsk->sk_state == SMC_INIT)
1859         newsmcsk->sk_state = SMC_ACTIVE;
1860 
1861     smc_listen_out(new_smc);
1862 }
1863 
1864 /* listen worker: finish in error state */
1865 static void smc_listen_out_err(struct smc_sock *new_smc)
1866 {
1867     struct sock *newsmcsk = &new_smc->sk;
1868     struct net *net = sock_net(newsmcsk);
1869 
1870     this_cpu_inc(net->smc.smc_stats->srv_hshake_err_cnt);
1871     if (newsmcsk->sk_state == SMC_INIT)
1872         sock_put(&new_smc->sk); /* passive closing */
1873     newsmcsk->sk_state = SMC_CLOSED;
1874 
1875     smc_listen_out(new_smc);
1876 }
1877 
1878 /* listen worker: decline and fall back if possible */
1879 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1880                    int local_first, u8 version)
1881 {
1882     /* RDMA setup failed, switch back to TCP */
1883     smc_conn_abort(new_smc, local_first);
1884     if (reason_code < 0 ||
1885         smc_switch_to_fallback(new_smc, reason_code)) {
1886         /* error, no fallback possible */
1887         smc_listen_out_err(new_smc);
1888         return;
1889     }
1890     if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1891         if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
1892             smc_listen_out_err(new_smc);
1893             return;
1894         }
1895     }
1896     smc_listen_out_connected(new_smc);
1897 }
1898 
1899 /* listen worker: version checking */
1900 static int smc_listen_v2_check(struct smc_sock *new_smc,
1901                    struct smc_clc_msg_proposal *pclc,
1902                    struct smc_init_info *ini)
1903 {
1904     struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext;
1905     struct smc_clc_v2_extension *pclc_v2_ext;
1906     int rc = SMC_CLC_DECL_PEERNOSMC;
1907 
1908     ini->smc_type_v1 = pclc->hdr.typev1;
1909     ini->smc_type_v2 = pclc->hdr.typev2;
1910     ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
1911     ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
1912     if (pclc->hdr.version > SMC_V1) {
1913         if (smcd_indicated(ini->smc_type_v2))
1914             ini->smcd_version |= SMC_V2;
1915         if (smcr_indicated(ini->smc_type_v2))
1916             ini->smcr_version |= SMC_V2;
1917     }
1918     if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
1919         rc = SMC_CLC_DECL_PEERNOSMC;
1920         goto out;
1921     }
1922     pclc_v2_ext = smc_get_clc_v2_ext(pclc);
1923     if (!pclc_v2_ext) {
1924         ini->smcd_version &= ~SMC_V2;
1925         ini->smcr_version &= ~SMC_V2;
1926         rc = SMC_CLC_DECL_NOV2EXT;
1927         goto out;
1928     }
1929     pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
1930     if (ini->smcd_version & SMC_V2) {
1931         if (!smc_ism_is_v2_capable()) {
1932             ini->smcd_version &= ~SMC_V2;
1933             rc = SMC_CLC_DECL_NOISM2SUPP;
1934         } else if (!pclc_smcd_v2_ext) {
1935             ini->smcd_version &= ~SMC_V2;
1936             rc = SMC_CLC_DECL_NOV2DEXT;
1937         } else if (!pclc_v2_ext->hdr.eid_cnt &&
1938                !pclc_v2_ext->hdr.flag.seid) {
1939             ini->smcd_version &= ~SMC_V2;
1940             rc = SMC_CLC_DECL_NOUEID;
1941         }
1942     }
1943     if (ini->smcr_version & SMC_V2) {
1944         if (!pclc_v2_ext->hdr.eid_cnt) {
1945             ini->smcr_version &= ~SMC_V2;
1946             rc = SMC_CLC_DECL_NOUEID;
1947         }
1948     }
1949 
1950 out:
1951     if (!ini->smcd_version && !ini->smcr_version)
1952         return rc;
1953 
1954     return 0;
1955 }
1956 
1957 /* listen worker: check prefixes */
1958 static int smc_listen_prfx_check(struct smc_sock *new_smc,
1959                  struct smc_clc_msg_proposal *pclc)
1960 {
1961     struct smc_clc_msg_proposal_prefix *pclc_prfx;
1962     struct socket *newclcsock = new_smc->clcsock;
1963 
1964     if (pclc->hdr.typev1 == SMC_TYPE_N)
1965         return 0;
1966     pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1967     if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1968         return SMC_CLC_DECL_DIFFPREFIX;
1969 
1970     return 0;
1971 }
1972 
1973 /* listen worker: initialize connection and buffers */
1974 static int smc_listen_rdma_init(struct smc_sock *new_smc,
1975                 struct smc_init_info *ini)
1976 {
1977     int rc;
1978 
1979     /* allocate connection / link group */
1980     rc = smc_conn_create(new_smc, ini);
1981     if (rc)
1982         return rc;
1983 
1984     /* create send buffer and rmb */
1985     if (smc_buf_create(new_smc, false))
1986         return SMC_CLC_DECL_MEM;
1987 
1988     return 0;
1989 }
1990 
1991 /* listen worker: initialize connection and buffers for SMC-D */
1992 static int smc_listen_ism_init(struct smc_sock *new_smc,
1993                    struct smc_init_info *ini)
1994 {
1995     int rc;
1996 
1997     rc = smc_conn_create(new_smc, ini);
1998     if (rc)
1999         return rc;
2000 
2001     /* Create send and receive buffers */
2002     rc = smc_buf_create(new_smc, true);
2003     if (rc) {
2004         smc_conn_abort(new_smc, ini->first_contact_local);
2005         return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
2006                      SMC_CLC_DECL_MEM;
2007     }
2008 
2009     return 0;
2010 }
2011 
2012 static bool smc_is_already_selected(struct smcd_dev *smcd,
2013                     struct smc_init_info *ini,
2014                     int matches)
2015 {
2016     int i;
2017 
2018     for (i = 0; i < matches; i++)
2019         if (smcd == ini->ism_dev[i])
2020             return true;
2021 
2022     return false;
2023 }
2024 
2025 /* check for ISM devices matching proposed ISM devices */
2026 static void smc_check_ism_v2_match(struct smc_init_info *ini,
2027                    u16 proposed_chid, u64 proposed_gid,
2028                    unsigned int *matches)
2029 {
2030     struct smcd_dev *smcd;
2031 
2032     list_for_each_entry(smcd, &smcd_dev_list.list, list) {
2033         if (smcd->going_away)
2034             continue;
2035         if (smc_is_already_selected(smcd, ini, *matches))
2036             continue;
2037         if (smc_ism_get_chid(smcd) == proposed_chid &&
2038             !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
2039             ini->ism_peer_gid[*matches] = proposed_gid;
2040             ini->ism_dev[*matches] = smcd;
2041             (*matches)++;
2042             break;
2043         }
2044     }
2045 }
2046 
2047 static void smc_find_ism_store_rc(u32 rc, struct smc_init_info *ini)
2048 {
2049     if (!ini->rc)
2050         ini->rc = rc;
2051 }
2052 
2053 static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
2054                     struct smc_clc_msg_proposal *pclc,
2055                     struct smc_init_info *ini)
2056 {
2057     struct smc_clc_smcd_v2_extension *smcd_v2_ext;
2058     struct smc_clc_v2_extension *smc_v2_ext;
2059     struct smc_clc_msg_smcd *pclc_smcd;
2060     unsigned int matches = 0;
2061     u8 smcd_version;
2062     u8 *eid = NULL;
2063     int i, rc;
2064 
2065     if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
2066         goto not_found;
2067 
2068     pclc_smcd = smc_get_clc_msg_smcd(pclc);
2069     smc_v2_ext = smc_get_clc_v2_ext(pclc);
2070     smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
2071 
2072     mutex_lock(&smcd_dev_list.mutex);
2073     if (pclc_smcd->ism.chid)
2074         /* check for ISM device matching proposed native ISM device */
2075         smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
2076                        ntohll(pclc_smcd->ism.gid), &matches);
2077     for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) {
2078         /* check for ISM devices matching proposed non-native ISM
2079          * devices
2080          */
2081         smc_check_ism_v2_match(ini,
2082                        ntohs(smcd_v2_ext->gidchid[i - 1].chid),
2083                        ntohll(smcd_v2_ext->gidchid[i - 1].gid),
2084                        &matches);
2085     }
2086     mutex_unlock(&smcd_dev_list.mutex);
2087 
2088     if (!ini->ism_dev[0]) {
2089         smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
2090         goto not_found;
2091     }
2092 
2093     smc_ism_get_system_eid(&eid);
2094     if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
2095                    smcd_v2_ext->system_eid, eid))
2096         goto not_found;
2097 
2098     /* separate - outside the smcd_dev_list.lock */
2099     smcd_version = ini->smcd_version;
2100     for (i = 0; i < matches; i++) {
2101         ini->smcd_version = SMC_V2;
2102         ini->is_smcd = true;
2103         ini->ism_selected = i;
2104         rc = smc_listen_ism_init(new_smc, ini);
2105         if (rc) {
2106             smc_find_ism_store_rc(rc, ini);
2107             /* try next active ISM device */
2108             continue;
2109         }
2110         return; /* matching and usable V2 ISM device found */
2111     }
2112     /* no V2 ISM device could be initialized */
2113     ini->smcd_version = smcd_version;   /* restore original value */
2114     ini->negotiated_eid[0] = 0;
2115 
2116 not_found:
2117     ini->smcd_version &= ~SMC_V2;
2118     ini->ism_dev[0] = NULL;
2119     ini->is_smcd = false;
2120 }
2121 
2122 static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
2123                     struct smc_clc_msg_proposal *pclc,
2124                     struct smc_init_info *ini)
2125 {
2126     struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
2127     int rc = 0;
2128 
2129     /* check if ISM V1 is available */
2130     if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
2131         goto not_found;
2132     ini->is_smcd = true; /* prepare ISM check */
2133     ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid);
2134     rc = smc_find_ism_device(new_smc, ini);
2135     if (rc)
2136         goto not_found;
2137     ini->ism_selected = 0;
2138     rc = smc_listen_ism_init(new_smc, ini);
2139     if (!rc)
2140         return;     /* V1 ISM device found */
2141 
2142 not_found:
2143     smc_find_ism_store_rc(rc, ini);
2144     ini->smcd_version &= ~SMC_V1;
2145     ini->ism_dev[0] = NULL;
2146     ini->is_smcd = false;
2147 }
2148 
2149 /* listen worker: register buffers */
2150 static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
2151 {
2152     struct smc_connection *conn = &new_smc->conn;
2153 
2154     if (!local_first) {
2155         /* reg sendbufs if they were vzalloced */
2156         if (conn->sndbuf_desc->is_vm) {
2157             if (smcr_lgr_reg_sndbufs(conn->lnk,
2158                          conn->sndbuf_desc))
2159                 return SMC_CLC_DECL_ERR_REGBUF;
2160         }
2161         if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
2162             return SMC_CLC_DECL_ERR_REGBUF;
2163     }
2164 
2165     return 0;
2166 }
2167 
2168 static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
2169                      struct smc_clc_msg_proposal *pclc,
2170                      struct smc_init_info *ini)
2171 {
2172     struct smc_clc_v2_extension *smc_v2_ext;
2173     u8 smcr_version;
2174     int rc;
2175 
2176     if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
2177         goto not_found;
2178 
2179     smc_v2_ext = smc_get_clc_v2_ext(pclc);
2180     if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
2181         goto not_found;
2182 
2183     /* prepare RDMA check */
2184     memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
2185     memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
2186     memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
2187     ini->check_smcrv2 = true;
2188     ini->smcrv2.clc_sk = new_smc->clcsock->sk;
2189     ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
2190     ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
2191     rc = smc_find_rdma_device(new_smc, ini);
2192     if (rc) {
2193         smc_find_ism_store_rc(rc, ini);
2194         goto not_found;
2195     }
2196     if (!ini->smcrv2.uses_gateway)
2197         memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
2198 
2199     smcr_version = ini->smcr_version;
2200     ini->smcr_version = SMC_V2;
2201     rc = smc_listen_rdma_init(new_smc, ini);
2202     if (!rc)
2203         rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
2204     if (!rc)
2205         return;
2206     ini->smcr_version = smcr_version;
2207     smc_find_ism_store_rc(rc, ini);
2208 
2209 not_found:
2210     ini->smcr_version &= ~SMC_V2;
2211     ini->smcrv2.ib_dev_v2 = NULL;
2212     ini->check_smcrv2 = false;
2213 }
2214 
2215 static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
2216                     struct smc_clc_msg_proposal *pclc,
2217                     struct smc_init_info *ini)
2218 {
2219     int rc;
2220 
2221     if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
2222         return SMC_CLC_DECL_NOSMCDEV;
2223 
2224     /* prepare RDMA check */
2225     memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
2226     memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
2227     memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
2228     rc = smc_find_rdma_device(new_smc, ini);
2229     if (rc) {
2230         /* no RDMA device found */
2231         return SMC_CLC_DECL_NOSMCDEV;
2232     }
2233     rc = smc_listen_rdma_init(new_smc, ini);
2234     if (rc)
2235         return rc;
2236     return smc_listen_rdma_reg(new_smc, ini->first_contact_local);
2237 }
2238 
2239 /* determine the local device matching to proposal */
2240 static int smc_listen_find_device(struct smc_sock *new_smc,
2241                   struct smc_clc_msg_proposal *pclc,
2242                   struct smc_init_info *ini)
2243 {
2244     int prfx_rc;
2245 
2246     /* check for ISM device matching V2 proposed device */
2247     smc_find_ism_v2_device_serv(new_smc, pclc, ini);
2248     if (ini->ism_dev[0])
2249         return 0;
2250 
2251     /* check for matching IP prefix and subnet length (V1) */
2252     prfx_rc = smc_listen_prfx_check(new_smc, pclc);
2253     if (prfx_rc)
2254         smc_find_ism_store_rc(prfx_rc, ini);
2255 
2256     /* get vlan id from IP device */
2257     if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
2258         return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
2259 
2260     /* check for ISM device matching V1 proposed device */
2261     if (!prfx_rc)
2262         smc_find_ism_v1_device_serv(new_smc, pclc, ini);
2263     if (ini->ism_dev[0])
2264         return 0;
2265 
2266     if (!smcr_indicated(pclc->hdr.typev1) &&
2267         !smcr_indicated(pclc->hdr.typev2))
2268         /* skip RDMA and decline */
2269         return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
2270 
2271     /* check if RDMA V2 is available */
2272     smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
2273     if (ini->smcrv2.ib_dev_v2)
2274         return 0;
2275 
2276     /* check if RDMA V1 is available */
2277     if (!prfx_rc) {
2278         int rc;
2279 
2280         rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
2281         smc_find_ism_store_rc(rc, ini);
2282         return (!rc) ? 0 : ini->rc;
2283     }
2284     return SMC_CLC_DECL_NOSMCDEV;
2285 }
2286 
2287 /* listen worker: finish RDMA setup */
2288 static int smc_listen_rdma_finish(struct smc_sock *new_smc,
2289                   struct smc_clc_msg_accept_confirm *cclc,
2290                   bool local_first,
2291                   struct smc_init_info *ini)
2292 {
2293     struct smc_link *link = new_smc->conn.lnk;
2294     int reason_code = 0;
2295 
2296     if (local_first)
2297         smc_link_save_peer_info(link, cclc, ini);
2298 
2299     if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
2300         return SMC_CLC_DECL_ERR_RTOK;
2301 
2302     if (local_first) {
2303         if (smc_ib_ready_link(link))
2304             return SMC_CLC_DECL_ERR_RDYLNK;
2305         /* QP confirmation over RoCE fabric */
2306         smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
2307         reason_code = smcr_serv_conf_first_link(new_smc);
2308         smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
2309     }
2310     return reason_code;
2311 }
2312 
2313 /* setup for connection of server */
2314 static void smc_listen_work(struct work_struct *work)
2315 {
2316     struct smc_sock *new_smc = container_of(work, struct smc_sock,
2317                         smc_listen_work);
2318     struct socket *newclcsock = new_smc->clcsock;
2319     struct smc_clc_msg_accept_confirm *cclc;
2320     struct smc_clc_msg_proposal_area *buf;
2321     struct smc_clc_msg_proposal *pclc;
2322     struct smc_init_info *ini = NULL;
2323     u8 proposal_version = SMC_V1;
2324     u8 accept_version;
2325     int rc = 0;
2326 
2327     if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
2328         return smc_listen_out_err(new_smc);
2329 
2330     if (new_smc->use_fallback) {
2331         smc_listen_out_connected(new_smc);
2332         return;
2333     }
2334 
2335     /* check if peer is smc capable */
2336     if (!tcp_sk(newclcsock->sk)->syn_smc) {
2337         rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
2338         if (rc)
2339             smc_listen_out_err(new_smc);
2340         else
2341             smc_listen_out_connected(new_smc);
2342         return;
2343     }
2344 
2345     /* do inband token exchange -
2346      * wait for and receive SMC Proposal CLC message
2347      */
2348     buf = kzalloc(sizeof(*buf), GFP_KERNEL);
2349     if (!buf) {
2350         rc = SMC_CLC_DECL_MEM;
2351         goto out_decl;
2352     }
2353     pclc = (struct smc_clc_msg_proposal *)buf;
2354     rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf),
2355                   SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
2356     if (rc)
2357         goto out_decl;
2358 
2359     if (pclc->hdr.version > SMC_V1)
2360         proposal_version = SMC_V2;
2361 
2362     /* IPSec connections opt out of SMC optimizations */
2363     if (using_ipsec(new_smc)) {
2364         rc = SMC_CLC_DECL_IPSEC;
2365         goto out_decl;
2366     }
2367 
2368     ini = kzalloc(sizeof(*ini), GFP_KERNEL);
2369     if (!ini) {
2370         rc = SMC_CLC_DECL_MEM;
2371         goto out_decl;
2372     }
2373 
2374     /* initial version checking */
2375     rc = smc_listen_v2_check(new_smc, pclc, ini);
2376     if (rc)
2377         goto out_decl;
2378 
2379     mutex_lock(&smc_server_lgr_pending);
2380     smc_close_init(new_smc);
2381     smc_rx_init(new_smc);
2382     smc_tx_init(new_smc);
2383 
2384     /* determine ISM or RoCE device used for connection */
2385     rc = smc_listen_find_device(new_smc, pclc, ini);
2386     if (rc)
2387         goto out_unlock;
2388 
2389     /* send SMC Accept CLC message */
2390     accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
2391     rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
2392                  accept_version, ini->negotiated_eid);
2393     if (rc)
2394         goto out_unlock;
2395 
2396     /* SMC-D does not need this lock any more */
2397     if (ini->is_smcd)
2398         mutex_unlock(&smc_server_lgr_pending);
2399 
2400     /* receive SMC Confirm CLC message */
2401     memset(buf, 0, sizeof(*buf));
2402     cclc = (struct smc_clc_msg_accept_confirm *)buf;
2403     rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf),
2404                   SMC_CLC_CONFIRM, CLC_WAIT_TIME);
2405     if (rc) {
2406         if (!ini->is_smcd)
2407             goto out_unlock;
2408         goto out_decl;
2409     }
2410 
2411     /* finish worker */
2412     if (!ini->is_smcd) {
2413         rc = smc_listen_rdma_finish(new_smc, cclc,
2414                         ini->first_contact_local, ini);
2415         if (rc)
2416             goto out_unlock;
2417         mutex_unlock(&smc_server_lgr_pending);
2418     }
2419     smc_conn_save_peer_info(new_smc, cclc);
2420     smc_listen_out_connected(new_smc);
2421     SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
2422     goto out_free;
2423 
2424 out_unlock:
2425     mutex_unlock(&smc_server_lgr_pending);
2426 out_decl:
2427     smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
2428                proposal_version);
2429 out_free:
2430     kfree(ini);
2431     kfree(buf);
2432 }
2433 
2434 static void smc_tcp_listen_work(struct work_struct *work)
2435 {
2436     struct smc_sock *lsmc = container_of(work, struct smc_sock,
2437                          tcp_listen_work);
2438     struct sock *lsk = &lsmc->sk;
2439     struct smc_sock *new_smc;
2440     int rc = 0;
2441 
2442     lock_sock(lsk);
2443     while (lsk->sk_state == SMC_LISTEN) {
2444         rc = smc_clcsock_accept(lsmc, &new_smc);
2445         if (rc) /* clcsock accept queue empty or error */
2446             goto out;
2447         if (!new_smc)
2448             continue;
2449 
2450         if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
2451             atomic_inc(&lsmc->queued_smc_hs);
2452 
2453         new_smc->listen_smc = lsmc;
2454         new_smc->use_fallback = lsmc->use_fallback;
2455         new_smc->fallback_rsn = lsmc->fallback_rsn;
2456         sock_hold(lsk); /* sock_put in smc_listen_work */
2457         INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
2458         smc_copy_sock_settings_to_smc(new_smc);
2459         new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
2460         new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
2461         sock_hold(&new_smc->sk); /* sock_put in passive closing */
2462         if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
2463             sock_put(&new_smc->sk);
2464     }
2465 
2466 out:
2467     release_sock(lsk);
2468     sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */
2469 }
2470 
2471 static void smc_clcsock_data_ready(struct sock *listen_clcsock)
2472 {
2473     struct smc_sock *lsmc;
2474 
2475     read_lock_bh(&listen_clcsock->sk_callback_lock);
2476     lsmc = smc_clcsock_user_data(listen_clcsock);
2477     if (!lsmc)
2478         goto out;
2479     lsmc->clcsk_data_ready(listen_clcsock);
2480     if (lsmc->sk.sk_state == SMC_LISTEN) {
2481         sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
2482         if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
2483             sock_put(&lsmc->sk);
2484     }
2485 out:
2486     read_unlock_bh(&listen_clcsock->sk_callback_lock);
2487 }
2488 
2489 static int smc_listen(struct socket *sock, int backlog)
2490 {
2491     struct sock *sk = sock->sk;
2492     struct smc_sock *smc;
2493     int rc;
2494 
2495     smc = smc_sk(sk);
2496     lock_sock(sk);
2497 
2498     rc = -EINVAL;
2499     if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
2500         smc->connect_nonblock || sock->state != SS_UNCONNECTED)
2501         goto out;
2502 
2503     rc = 0;
2504     if (sk->sk_state == SMC_LISTEN) {
2505         sk->sk_max_ack_backlog = backlog;
2506         goto out;
2507     }
2508     /* some socket options are handled in core, so we could not apply
2509      * them to the clc socket -- copy smc socket options to clc socket
2510      */
2511     smc_copy_sock_settings_to_clc(smc);
2512     if (!smc->use_fallback)
2513         tcp_sk(smc->clcsock->sk)->syn_smc = 1;
2514 
2515     /* save original sk_data_ready function and establish
2516      * smc-specific sk_data_ready function
2517      */
2518     write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
2519     smc->clcsock->sk->sk_user_data =
2520         (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
2521     smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
2522                    smc_clcsock_data_ready, &smc->clcsk_data_ready);
2523     write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
2524 
2525     /* save original ops */
2526     smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
2527 
2528     smc->af_ops = *smc->ori_af_ops;
2529     smc->af_ops.syn_recv_sock = smc_tcp_syn_recv_sock;
2530 
2531     inet_csk(smc->clcsock->sk)->icsk_af_ops = &smc->af_ops;
2532 
2533     if (smc->limit_smc_hs)
2534         tcp_sk(smc->clcsock->sk)->smc_hs_congested = smc_hs_congested;
2535 
2536     rc = kernel_listen(smc->clcsock, backlog);
2537     if (rc) {
2538         write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
2539         smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
2540                        &smc->clcsk_data_ready);
2541         smc->clcsock->sk->sk_user_data = NULL;
2542         write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
2543         goto out;
2544     }
2545     sk->sk_max_ack_backlog = backlog;
2546     sk->sk_ack_backlog = 0;
2547     sk->sk_state = SMC_LISTEN;
2548 
2549 out:
2550     release_sock(sk);
2551     return rc;
2552 }
2553 
2554 static int smc_accept(struct socket *sock, struct socket *new_sock,
2555               int flags, bool kern)
2556 {
2557     struct sock *sk = sock->sk, *nsk;
2558     DECLARE_WAITQUEUE(wait, current);
2559     struct smc_sock *lsmc;
2560     long timeo;
2561     int rc = 0;
2562 
2563     lsmc = smc_sk(sk);
2564     sock_hold(sk); /* sock_put below */
2565     lock_sock(sk);
2566 
2567     if (lsmc->sk.sk_state != SMC_LISTEN) {
2568         rc = -EINVAL;
2569         release_sock(sk);
2570         goto out;
2571     }
2572 
2573     /* Wait for an incoming connection */
2574     timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
2575     add_wait_queue_exclusive(sk_sleep(sk), &wait);
2576     while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
2577         set_current_state(TASK_INTERRUPTIBLE);
2578         if (!timeo) {
2579             rc = -EAGAIN;
2580             break;
2581         }
2582         release_sock(sk);
2583         timeo = schedule_timeout(timeo);
2584         /* wakeup by sk_data_ready in smc_listen_work() */
2585         sched_annotate_sleep();
2586         lock_sock(sk);
2587         if (signal_pending(current)) {
2588             rc = sock_intr_errno(timeo);
2589             break;
2590         }
2591     }
2592     set_current_state(TASK_RUNNING);
2593     remove_wait_queue(sk_sleep(sk), &wait);
2594 
2595     if (!rc)
2596         rc = sock_error(nsk);
2597     release_sock(sk);
2598     if (rc)
2599         goto out;
2600 
2601     if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
2602         /* wait till data arrives on the socket */
2603         timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
2604                                 MSEC_PER_SEC);
2605         if (smc_sk(nsk)->use_fallback) {
2606             struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
2607 
2608             lock_sock(clcsk);
2609             if (skb_queue_empty(&clcsk->sk_receive_queue))
2610                 sk_wait_data(clcsk, &timeo, NULL);
2611             release_sock(clcsk);
2612         } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
2613             lock_sock(nsk);
2614             smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
2615             release_sock(nsk);
2616         }
2617     }
2618 
2619 out:
2620     sock_put(sk); /* sock_hold above */
2621     return rc;
2622 }
2623 
2624 static int smc_getname(struct socket *sock, struct sockaddr *addr,
2625                int peer)
2626 {
2627     struct smc_sock *smc;
2628 
2629     if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
2630         (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
2631         return -ENOTCONN;
2632 
2633     smc = smc_sk(sock->sk);
2634 
2635     return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
2636 }
2637 
2638 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2639 {
2640     struct sock *sk = sock->sk;
2641     struct smc_sock *smc;
2642     int rc = -EPIPE;
2643 
2644     smc = smc_sk(sk);
2645     lock_sock(sk);
2646     if ((sk->sk_state != SMC_ACTIVE) &&
2647         (sk->sk_state != SMC_APPCLOSEWAIT1) &&
2648         (sk->sk_state != SMC_INIT))
2649         goto out;
2650 
2651     if (msg->msg_flags & MSG_FASTOPEN) {
2652         if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
2653             rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
2654             if (rc)
2655                 goto out;
2656         } else {
2657             rc = -EINVAL;
2658             goto out;
2659         }
2660     }
2661 
2662     if (smc->use_fallback) {
2663         rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
2664     } else {
2665         rc = smc_tx_sendmsg(smc, msg, len);
2666         SMC_STAT_TX_PAYLOAD(smc, len, rc);
2667     }
2668 out:
2669     release_sock(sk);
2670     return rc;
2671 }
2672 
2673 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
2674                int flags)
2675 {
2676     struct sock *sk = sock->sk;
2677     struct smc_sock *smc;
2678     int rc = -ENOTCONN;
2679 
2680     smc = smc_sk(sk);
2681     lock_sock(sk);
2682     if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
2683         /* socket was connected before, no more data to read */
2684         rc = 0;
2685         goto out;
2686     }
2687     if ((sk->sk_state == SMC_INIT) ||
2688         (sk->sk_state == SMC_LISTEN) ||
2689         (sk->sk_state == SMC_CLOSED))
2690         goto out;
2691 
2692     if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
2693         rc = 0;
2694         goto out;
2695     }
2696 
2697     if (smc->use_fallback) {
2698         rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
2699     } else {
2700         msg->msg_namelen = 0;
2701         rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
2702         SMC_STAT_RX_PAYLOAD(smc, rc, rc);
2703     }
2704 
2705 out:
2706     release_sock(sk);
2707     return rc;
2708 }
2709 
2710 static __poll_t smc_accept_poll(struct sock *parent)
2711 {
2712     struct smc_sock *isk = smc_sk(parent);
2713     __poll_t mask = 0;
2714 
2715     spin_lock(&isk->accept_q_lock);
2716     if (!list_empty(&isk->accept_q))
2717         mask = EPOLLIN | EPOLLRDNORM;
2718     spin_unlock(&isk->accept_q_lock);
2719 
2720     return mask;
2721 }
2722 
2723 static __poll_t smc_poll(struct file *file, struct socket *sock,
2724                  poll_table *wait)
2725 {
2726     struct sock *sk = sock->sk;
2727     struct smc_sock *smc;
2728     __poll_t mask = 0;
2729 
2730     if (!sk)
2731         return EPOLLNVAL;
2732 
2733     smc = smc_sk(sock->sk);
2734     if (smc->use_fallback) {
2735         /* delegate to CLC child sock */
2736         mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
2737         sk->sk_err = smc->clcsock->sk->sk_err;
2738     } else {
2739         if (sk->sk_state != SMC_CLOSED)
2740             sock_poll_wait(file, sock, wait);
2741         if (sk->sk_err)
2742             mask |= EPOLLERR;
2743         if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
2744             (sk->sk_state == SMC_CLOSED))
2745             mask |= EPOLLHUP;
2746         if (sk->sk_state == SMC_LISTEN) {
2747             /* woken up by sk_data_ready in smc_listen_work() */
2748             mask |= smc_accept_poll(sk);
2749         } else if (smc->use_fallback) { /* as result of connect_work()*/
2750             mask |= smc->clcsock->ops->poll(file, smc->clcsock,
2751                                wait);
2752             sk->sk_err = smc->clcsock->sk->sk_err;
2753         } else {
2754             if ((sk->sk_state != SMC_INIT &&
2755                  atomic_read(&smc->conn.sndbuf_space)) ||
2756                 sk->sk_shutdown & SEND_SHUTDOWN) {
2757                 mask |= EPOLLOUT | EPOLLWRNORM;
2758             } else {
2759                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2760                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2761             }
2762             if (atomic_read(&smc->conn.bytes_to_rcv))
2763                 mask |= EPOLLIN | EPOLLRDNORM;
2764             if (sk->sk_shutdown & RCV_SHUTDOWN)
2765                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2766             if (sk->sk_state == SMC_APPCLOSEWAIT1)
2767                 mask |= EPOLLIN;
2768             if (smc->conn.urg_state == SMC_URG_VALID)
2769                 mask |= EPOLLPRI;
2770         }
2771     }
2772 
2773     return mask;
2774 }
2775 
2776 static int smc_shutdown(struct socket *sock, int how)
2777 {
2778     struct sock *sk = sock->sk;
2779     bool do_shutdown = true;
2780     struct smc_sock *smc;
2781     int rc = -EINVAL;
2782     int old_state;
2783     int rc1 = 0;
2784 
2785     smc = smc_sk(sk);
2786 
2787     if ((how < SHUT_RD) || (how > SHUT_RDWR))
2788         return rc;
2789 
2790     lock_sock(sk);
2791 
2792     if (sock->state == SS_CONNECTING) {
2793         if (sk->sk_state == SMC_ACTIVE)
2794             sock->state = SS_CONNECTED;
2795         else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
2796              sk->sk_state == SMC_PEERCLOSEWAIT2 ||
2797              sk->sk_state == SMC_APPCLOSEWAIT1 ||
2798              sk->sk_state == SMC_APPCLOSEWAIT2 ||
2799              sk->sk_state == SMC_APPFINCLOSEWAIT)
2800             sock->state = SS_DISCONNECTING;
2801     }
2802 
2803     rc = -ENOTCONN;
2804     if ((sk->sk_state != SMC_ACTIVE) &&
2805         (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
2806         (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
2807         (sk->sk_state != SMC_APPCLOSEWAIT1) &&
2808         (sk->sk_state != SMC_APPCLOSEWAIT2) &&
2809         (sk->sk_state != SMC_APPFINCLOSEWAIT))
2810         goto out;
2811     if (smc->use_fallback) {
2812         rc = kernel_sock_shutdown(smc->clcsock, how);
2813         sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
2814         if (sk->sk_shutdown == SHUTDOWN_MASK) {
2815             sk->sk_state = SMC_CLOSED;
2816             sk->sk_socket->state = SS_UNCONNECTED;
2817             sock_put(sk);
2818         }
2819         goto out;
2820     }
2821     switch (how) {
2822     case SHUT_RDWR:     /* shutdown in both directions */
2823         old_state = sk->sk_state;
2824         rc = smc_close_active(smc);
2825         if (old_state == SMC_ACTIVE &&
2826             sk->sk_state == SMC_PEERCLOSEWAIT1)
2827             do_shutdown = false;
2828         break;
2829     case SHUT_WR:
2830         rc = smc_close_shutdown_write(smc);
2831         break;
2832     case SHUT_RD:
2833         rc = 0;
2834         /* nothing more to do because peer is not involved */
2835         break;
2836     }
2837     if (do_shutdown && smc->clcsock)
2838         rc1 = kernel_sock_shutdown(smc->clcsock, how);
2839     /* map sock_shutdown_cmd constants to sk_shutdown value range */
2840     sk->sk_shutdown |= how + 1;
2841 
2842     if (sk->sk_state == SMC_CLOSED)
2843         sock->state = SS_UNCONNECTED;
2844     else
2845         sock->state = SS_DISCONNECTING;
2846 out:
2847     release_sock(sk);
2848     return rc ? rc : rc1;
2849 }
2850 
2851 static int __smc_getsockopt(struct socket *sock, int level, int optname,
2852                 char __user *optval, int __user *optlen)
2853 {
2854     struct smc_sock *smc;
2855     int val, len;
2856 
2857     smc = smc_sk(sock->sk);
2858 
2859     if (get_user(len, optlen))
2860         return -EFAULT;
2861 
2862     len = min_t(int, len, sizeof(int));
2863 
2864     if (len < 0)
2865         return -EINVAL;
2866 
2867     switch (optname) {
2868     case SMC_LIMIT_HS:
2869         val = smc->limit_smc_hs;
2870         break;
2871     default:
2872         return -EOPNOTSUPP;
2873     }
2874 
2875     if (put_user(len, optlen))
2876         return -EFAULT;
2877     if (copy_to_user(optval, &val, len))
2878         return -EFAULT;
2879 
2880     return 0;
2881 }
2882 
2883 static int __smc_setsockopt(struct socket *sock, int level, int optname,
2884                 sockptr_t optval, unsigned int optlen)
2885 {
2886     struct sock *sk = sock->sk;
2887     struct smc_sock *smc;
2888     int val, rc;
2889 
2890     smc = smc_sk(sk);
2891 
2892     lock_sock(sk);
2893     switch (optname) {
2894     case SMC_LIMIT_HS:
2895         if (optlen < sizeof(int)) {
2896             rc = -EINVAL;
2897             break;
2898         }
2899         if (copy_from_sockptr(&val, optval, sizeof(int))) {
2900             rc = -EFAULT;
2901             break;
2902         }
2903 
2904         smc->limit_smc_hs = !!val;
2905         rc = 0;
2906         break;
2907     default:
2908         rc = -EOPNOTSUPP;
2909         break;
2910     }
2911     release_sock(sk);
2912 
2913     return rc;
2914 }
2915 
2916 static int smc_setsockopt(struct socket *sock, int level, int optname,
2917               sockptr_t optval, unsigned int optlen)
2918 {
2919     struct sock *sk = sock->sk;
2920     struct smc_sock *smc;
2921     int val, rc;
2922 
2923     if (level == SOL_TCP && optname == TCP_ULP)
2924         return -EOPNOTSUPP;
2925     else if (level == SOL_SMC)
2926         return __smc_setsockopt(sock, level, optname, optval, optlen);
2927 
2928     smc = smc_sk(sk);
2929 
2930     /* generic setsockopts reaching us here always apply to the
2931      * CLC socket
2932      */
2933     mutex_lock(&smc->clcsock_release_lock);
2934     if (!smc->clcsock) {
2935         mutex_unlock(&smc->clcsock_release_lock);
2936         return -EBADF;
2937     }
2938     if (unlikely(!smc->clcsock->ops->setsockopt))
2939         rc = -EOPNOTSUPP;
2940     else
2941         rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
2942                            optval, optlen);
2943     if (smc->clcsock->sk->sk_err) {
2944         sk->sk_err = smc->clcsock->sk->sk_err;
2945         sk_error_report(sk);
2946     }
2947     mutex_unlock(&smc->clcsock_release_lock);
2948 
2949     if (optlen < sizeof(int))
2950         return -EINVAL;
2951     if (copy_from_sockptr(&val, optval, sizeof(int)))
2952         return -EFAULT;
2953 
2954     lock_sock(sk);
2955     if (rc || smc->use_fallback)
2956         goto out;
2957     switch (optname) {
2958     case TCP_FASTOPEN:
2959     case TCP_FASTOPEN_CONNECT:
2960     case TCP_FASTOPEN_KEY:
2961     case TCP_FASTOPEN_NO_COOKIE:
2962         /* option not supported by SMC */
2963         if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
2964             rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
2965         } else {
2966             rc = -EINVAL;
2967         }
2968         break;
2969     case TCP_NODELAY:
2970         if (sk->sk_state != SMC_INIT &&
2971             sk->sk_state != SMC_LISTEN &&
2972             sk->sk_state != SMC_CLOSED) {
2973             if (val) {
2974                 SMC_STAT_INC(smc, ndly_cnt);
2975                 smc_tx_pending(&smc->conn);
2976                 cancel_delayed_work(&smc->conn.tx_work);
2977             }
2978         }
2979         break;
2980     case TCP_CORK:
2981         if (sk->sk_state != SMC_INIT &&
2982             sk->sk_state != SMC_LISTEN &&
2983             sk->sk_state != SMC_CLOSED) {
2984             if (!val) {
2985                 SMC_STAT_INC(smc, cork_cnt);
2986                 smc_tx_pending(&smc->conn);
2987                 cancel_delayed_work(&smc->conn.tx_work);
2988             }
2989         }
2990         break;
2991     case TCP_DEFER_ACCEPT:
2992         smc->sockopt_defer_accept = val;
2993         break;
2994     default:
2995         break;
2996     }
2997 out:
2998     release_sock(sk);
2999 
3000     return rc;
3001 }
3002 
3003 static int smc_getsockopt(struct socket *sock, int level, int optname,
3004               char __user *optval, int __user *optlen)
3005 {
3006     struct smc_sock *smc;
3007     int rc;
3008 
3009     if (level == SOL_SMC)
3010         return __smc_getsockopt(sock, level, optname, optval, optlen);
3011 
3012     smc = smc_sk(sock->sk);
3013     mutex_lock(&smc->clcsock_release_lock);
3014     if (!smc->clcsock) {
3015         mutex_unlock(&smc->clcsock_release_lock);
3016         return -EBADF;
3017     }
3018     /* socket options apply to the CLC socket */
3019     if (unlikely(!smc->clcsock->ops->getsockopt)) {
3020         mutex_unlock(&smc->clcsock_release_lock);
3021         return -EOPNOTSUPP;
3022     }
3023     rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
3024                        optval, optlen);
3025     mutex_unlock(&smc->clcsock_release_lock);
3026     return rc;
3027 }
3028 
3029 static int smc_ioctl(struct socket *sock, unsigned int cmd,
3030              unsigned long arg)
3031 {
3032     union smc_host_cursor cons, urg;
3033     struct smc_connection *conn;
3034     struct smc_sock *smc;
3035     int answ;
3036 
3037     smc = smc_sk(sock->sk);
3038     conn = &smc->conn;
3039     lock_sock(&smc->sk);
3040     if (smc->use_fallback) {
3041         if (!smc->clcsock) {
3042             release_sock(&smc->sk);
3043             return -EBADF;
3044         }
3045         answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
3046         release_sock(&smc->sk);
3047         return answ;
3048     }
3049     switch (cmd) {
3050     case SIOCINQ: /* same as FIONREAD */
3051         if (smc->sk.sk_state == SMC_LISTEN) {
3052             release_sock(&smc->sk);
3053             return -EINVAL;
3054         }
3055         if (smc->sk.sk_state == SMC_INIT ||
3056             smc->sk.sk_state == SMC_CLOSED)
3057             answ = 0;
3058         else
3059             answ = atomic_read(&smc->conn.bytes_to_rcv);
3060         break;
3061     case SIOCOUTQ:
3062         /* output queue size (not send + not acked) */
3063         if (smc->sk.sk_state == SMC_LISTEN) {
3064             release_sock(&smc->sk);
3065             return -EINVAL;
3066         }
3067         if (smc->sk.sk_state == SMC_INIT ||
3068             smc->sk.sk_state == SMC_CLOSED)
3069             answ = 0;
3070         else
3071             answ = smc->conn.sndbuf_desc->len -
3072                     atomic_read(&smc->conn.sndbuf_space);
3073         break;
3074     case SIOCOUTQNSD:
3075         /* output queue size (not send only) */
3076         if (smc->sk.sk_state == SMC_LISTEN) {
3077             release_sock(&smc->sk);
3078             return -EINVAL;
3079         }
3080         if (smc->sk.sk_state == SMC_INIT ||
3081             smc->sk.sk_state == SMC_CLOSED)
3082             answ = 0;
3083         else
3084             answ = smc_tx_prepared_sends(&smc->conn);
3085         break;
3086     case SIOCATMARK:
3087         if (smc->sk.sk_state == SMC_LISTEN) {
3088             release_sock(&smc->sk);
3089             return -EINVAL;
3090         }
3091         if (smc->sk.sk_state == SMC_INIT ||
3092             smc->sk.sk_state == SMC_CLOSED) {
3093             answ = 0;
3094         } else {
3095             smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
3096             smc_curs_copy(&urg, &conn->urg_curs, conn);
3097             answ = smc_curs_diff(conn->rmb_desc->len,
3098                          &cons, &urg) == 1;
3099         }
3100         break;
3101     default:
3102         release_sock(&smc->sk);
3103         return -ENOIOCTLCMD;
3104     }
3105     release_sock(&smc->sk);
3106 
3107     return put_user(answ, (int __user *)arg);
3108 }
3109 
3110 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
3111                 int offset, size_t size, int flags)
3112 {
3113     struct sock *sk = sock->sk;
3114     struct smc_sock *smc;
3115     int rc = -EPIPE;
3116 
3117     smc = smc_sk(sk);
3118     lock_sock(sk);
3119     if (sk->sk_state != SMC_ACTIVE) {
3120         release_sock(sk);
3121         goto out;
3122     }
3123     release_sock(sk);
3124     if (smc->use_fallback) {
3125         rc = kernel_sendpage(smc->clcsock, page, offset,
3126                      size, flags);
3127     } else {
3128         lock_sock(sk);
3129         rc = smc_tx_sendpage(smc, page, offset, size, flags);
3130         release_sock(sk);
3131         SMC_STAT_INC(smc, sendpage_cnt);
3132     }
3133 
3134 out:
3135     return rc;
3136 }
3137 
3138 /* Map the affected portions of the rmbe into an spd, note the number of bytes
3139  * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
3140  * updates till whenever a respective page has been fully processed.
3141  * Note that subsequent recv() calls have to wait till all splice() processing
3142  * completed.
3143  */
3144 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
3145                    struct pipe_inode_info *pipe, size_t len,
3146                    unsigned int flags)
3147 {
3148     struct sock *sk = sock->sk;
3149     struct smc_sock *smc;
3150     int rc = -ENOTCONN;
3151 
3152     smc = smc_sk(sk);
3153     lock_sock(sk);
3154     if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
3155         /* socket was connected before, no more data to read */
3156         rc = 0;
3157         goto out;
3158     }
3159     if (sk->sk_state == SMC_INIT ||
3160         sk->sk_state == SMC_LISTEN ||
3161         sk->sk_state == SMC_CLOSED)
3162         goto out;
3163 
3164     if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
3165         rc = 0;
3166         goto out;
3167     }
3168 
3169     if (smc->use_fallback) {
3170         rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
3171                             pipe, len, flags);
3172     } else {
3173         if (*ppos) {
3174             rc = -ESPIPE;
3175             goto out;
3176         }
3177         if (flags & SPLICE_F_NONBLOCK)
3178             flags = MSG_DONTWAIT;
3179         else
3180             flags = 0;
3181         SMC_STAT_INC(smc, splice_cnt);
3182         rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
3183     }
3184 out:
3185     release_sock(sk);
3186 
3187     return rc;
3188 }
3189 
3190 /* must look like tcp */
3191 static const struct proto_ops smc_sock_ops = {
3192     .family     = PF_SMC,
3193     .owner      = THIS_MODULE,
3194     .release    = smc_release,
3195     .bind       = smc_bind,
3196     .connect    = smc_connect,
3197     .socketpair = sock_no_socketpair,
3198     .accept     = smc_accept,
3199     .getname    = smc_getname,
3200     .poll       = smc_poll,
3201     .ioctl      = smc_ioctl,
3202     .listen     = smc_listen,
3203     .shutdown   = smc_shutdown,
3204     .setsockopt = smc_setsockopt,
3205     .getsockopt = smc_getsockopt,
3206     .sendmsg    = smc_sendmsg,
3207     .recvmsg    = smc_recvmsg,
3208     .mmap       = sock_no_mmap,
3209     .sendpage   = smc_sendpage,
3210     .splice_read    = smc_splice_read,
3211 };
3212 
3213 static int __smc_create(struct net *net, struct socket *sock, int protocol,
3214             int kern, struct socket *clcsock)
3215 {
3216     int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
3217     struct smc_sock *smc;
3218     struct sock *sk;
3219     int rc;
3220 
3221     rc = -ESOCKTNOSUPPORT;
3222     if (sock->type != SOCK_STREAM)
3223         goto out;
3224 
3225     rc = -EPROTONOSUPPORT;
3226     if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
3227         goto out;
3228 
3229     rc = -ENOBUFS;
3230     sock->ops = &smc_sock_ops;
3231     sock->state = SS_UNCONNECTED;
3232     sk = smc_sock_alloc(net, sock, protocol);
3233     if (!sk)
3234         goto out;
3235 
3236     /* create internal TCP socket for CLC handshake and fallback */
3237     smc = smc_sk(sk);
3238     smc->use_fallback = false; /* assume rdma capability first */
3239     smc->fallback_rsn = 0;
3240 
3241     /* default behavior from limit_smc_hs in every net namespace */
3242     smc->limit_smc_hs = net->smc.limit_smc_hs;
3243 
3244     rc = 0;
3245     if (!clcsock) {
3246         rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
3247                       &smc->clcsock);
3248         if (rc) {
3249             sk_common_release(sk);
3250             goto out;
3251         }
3252     } else {
3253         smc->clcsock = clcsock;
3254     }
3255 
3256     smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
3257     smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
3258 
3259 out:
3260     return rc;
3261 }
3262 
3263 static int smc_create(struct net *net, struct socket *sock, int protocol,
3264               int kern)
3265 {
3266     return __smc_create(net, sock, protocol, kern, NULL);
3267 }
3268 
3269 static const struct net_proto_family smc_sock_family_ops = {
3270     .family = PF_SMC,
3271     .owner  = THIS_MODULE,
3272     .create = smc_create,
3273 };
3274 
3275 static int smc_ulp_init(struct sock *sk)
3276 {
3277     struct socket *tcp = sk->sk_socket;
3278     struct net *net = sock_net(sk);
3279     struct socket *smcsock;
3280     int protocol, ret;
3281 
3282     /* only TCP can be replaced */
3283     if (tcp->type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP ||
3284         (sk->sk_family != AF_INET && sk->sk_family != AF_INET6))
3285         return -ESOCKTNOSUPPORT;
3286     /* don't handle wq now */
3287     if (tcp->state != SS_UNCONNECTED || !tcp->file || tcp->wq.fasync_list)
3288         return -ENOTCONN;
3289 
3290     if (sk->sk_family == AF_INET)
3291         protocol = SMCPROTO_SMC;
3292     else
3293         protocol = SMCPROTO_SMC6;
3294 
3295     smcsock = sock_alloc();
3296     if (!smcsock)
3297         return -ENFILE;
3298 
3299     smcsock->type = SOCK_STREAM;
3300     __module_get(THIS_MODULE); /* tried in __tcp_ulp_find_autoload */
3301     ret = __smc_create(net, smcsock, protocol, 1, tcp);
3302     if (ret) {
3303         sock_release(smcsock); /* module_put() which ops won't be NULL */
3304         return ret;
3305     }
3306 
3307     /* replace tcp socket to smc */
3308     smcsock->file = tcp->file;
3309     smcsock->file->private_data = smcsock;
3310     smcsock->file->f_inode = SOCK_INODE(smcsock); /* replace inode when sock_close */
3311     smcsock->file->f_path.dentry->d_inode = SOCK_INODE(smcsock); /* dput() in __fput */
3312     tcp->file = NULL;
3313 
3314     return ret;
3315 }
3316 
3317 static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
3318               const gfp_t priority)
3319 {
3320     struct inet_connection_sock *icsk = inet_csk(newsk);
3321 
3322     /* don't inherit ulp ops to child when listen */
3323     icsk->icsk_ulp_ops = NULL;
3324 }
3325 
3326 static struct tcp_ulp_ops smc_ulp_ops __read_mostly = {
3327     .name       = "smc",
3328     .owner      = THIS_MODULE,
3329     .init       = smc_ulp_init,
3330     .clone      = smc_ulp_clone,
3331 };
3332 
3333 unsigned int smc_net_id;
3334 
3335 static __net_init int smc_net_init(struct net *net)
3336 {
3337     int rc;
3338 
3339     rc = smc_sysctl_net_init(net);
3340     if (rc)
3341         return rc;
3342     return smc_pnet_net_init(net);
3343 }
3344 
3345 static void __net_exit smc_net_exit(struct net *net)
3346 {
3347     smc_sysctl_net_exit(net);
3348     smc_pnet_net_exit(net);
3349 }
3350 
3351 static __net_init int smc_net_stat_init(struct net *net)
3352 {
3353     return smc_stats_init(net);
3354 }
3355 
3356 static void __net_exit smc_net_stat_exit(struct net *net)
3357 {
3358     smc_stats_exit(net);
3359 }
3360 
3361 static struct pernet_operations smc_net_ops = {
3362     .init = smc_net_init,
3363     .exit = smc_net_exit,
3364     .id   = &smc_net_id,
3365     .size = sizeof(struct smc_net),
3366 };
3367 
3368 static struct pernet_operations smc_net_stat_ops = {
3369     .init = smc_net_stat_init,
3370     .exit = smc_net_stat_exit,
3371 };
3372 
3373 static int __init smc_init(void)
3374 {
3375     int rc;
3376 
3377     rc = register_pernet_subsys(&smc_net_ops);
3378     if (rc)
3379         return rc;
3380 
3381     rc = register_pernet_subsys(&smc_net_stat_ops);
3382     if (rc)
3383         return rc;
3384 
3385     smc_ism_init();
3386     smc_clc_init();
3387 
3388     rc = smc_nl_init();
3389     if (rc)
3390         goto out_pernet_subsys;
3391 
3392     rc = smc_pnet_init();
3393     if (rc)
3394         goto out_nl;
3395 
3396     rc = -ENOMEM;
3397 
3398     smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", 0, 0);
3399     if (!smc_tcp_ls_wq)
3400         goto out_pnet;
3401 
3402     smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
3403     if (!smc_hs_wq)
3404         goto out_alloc_tcp_ls_wq;
3405 
3406     smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
3407     if (!smc_close_wq)
3408         goto out_alloc_hs_wq;
3409 
3410     rc = smc_core_init();
3411     if (rc) {
3412         pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
3413         goto out_alloc_wqs;
3414     }
3415 
3416     rc = smc_llc_init();
3417     if (rc) {
3418         pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
3419         goto out_core;
3420     }
3421 
3422     rc = smc_cdc_init();
3423     if (rc) {
3424         pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
3425         goto out_core;
3426     }
3427 
3428     rc = proto_register(&smc_proto, 1);
3429     if (rc) {
3430         pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
3431         goto out_core;
3432     }
3433 
3434     rc = proto_register(&smc_proto6, 1);
3435     if (rc) {
3436         pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
3437         goto out_proto;
3438     }
3439 
3440     rc = sock_register(&smc_sock_family_ops);
3441     if (rc) {
3442         pr_err("%s: sock_register fails with %d\n", __func__, rc);
3443         goto out_proto6;
3444     }
3445     INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
3446     INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
3447 
3448     rc = smc_ib_register_client();
3449     if (rc) {
3450         pr_err("%s: ib_register fails with %d\n", __func__, rc);
3451         goto out_sock;
3452     }
3453 
3454     rc = tcp_register_ulp(&smc_ulp_ops);
3455     if (rc) {
3456         pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
3457         goto out_ib;
3458     }
3459 
3460     static_branch_enable(&tcp_have_smc);
3461     return 0;
3462 
3463 out_ib:
3464     smc_ib_unregister_client();
3465 out_sock:
3466     sock_unregister(PF_SMC);
3467 out_proto6:
3468     proto_unregister(&smc_proto6);
3469 out_proto:
3470     proto_unregister(&smc_proto);
3471 out_core:
3472     smc_core_exit();
3473 out_alloc_wqs:
3474     destroy_workqueue(smc_close_wq);
3475 out_alloc_hs_wq:
3476     destroy_workqueue(smc_hs_wq);
3477 out_alloc_tcp_ls_wq:
3478     destroy_workqueue(smc_tcp_ls_wq);
3479 out_pnet:
3480     smc_pnet_exit();
3481 out_nl:
3482     smc_nl_exit();
3483 out_pernet_subsys:
3484     unregister_pernet_subsys(&smc_net_ops);
3485 
3486     return rc;
3487 }
3488 
3489 static void __exit smc_exit(void)
3490 {
3491     static_branch_disable(&tcp_have_smc);
3492     tcp_unregister_ulp(&smc_ulp_ops);
3493     sock_unregister(PF_SMC);
3494     smc_core_exit();
3495     smc_ib_unregister_client();
3496     destroy_workqueue(smc_close_wq);
3497     destroy_workqueue(smc_tcp_ls_wq);
3498     destroy_workqueue(smc_hs_wq);
3499     proto_unregister(&smc_proto6);
3500     proto_unregister(&smc_proto);
3501     smc_pnet_exit();
3502     smc_nl_exit();
3503     smc_clc_exit();
3504     unregister_pernet_subsys(&smc_net_stat_ops);
3505     unregister_pernet_subsys(&smc_net_ops);
3506     rcu_barrier();
3507 }
3508 
3509 module_init(smc_init);
3510 module_exit(smc_exit);
3511 
3512 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
3513 MODULE_DESCRIPTION("smc socket address family");
3514 MODULE_LICENSE("GPL");
3515 MODULE_ALIAS_NETPROTO(PF_SMC);
3516 MODULE_ALIAS_TCP_ULP("smc");
3517 MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);