Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
0004  *
0005  *  Socket Closing - normal and abnormal
0006  *
0007  *  Copyright IBM Corp. 2016
0008  *
0009  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
0010  */
0011 
0012 #include <linux/workqueue.h>
0013 #include <linux/sched/signal.h>
0014 
0015 #include <net/sock.h>
0016 #include <net/tcp.h>
0017 
0018 #include "smc.h"
0019 #include "smc_tx.h"
0020 #include "smc_cdc.h"
0021 #include "smc_close.h"
0022 
0023 /* release the clcsock that is assigned to the smc_sock */
0024 void smc_clcsock_release(struct smc_sock *smc)
0025 {
0026     struct socket *tcp;
0027 
0028     if (smc->listen_smc && current_work() != &smc->smc_listen_work)
0029         cancel_work_sync(&smc->smc_listen_work);
0030     mutex_lock(&smc->clcsock_release_lock);
0031     if (smc->clcsock) {
0032         tcp = smc->clcsock;
0033         smc->clcsock = NULL;
0034         sock_release(tcp);
0035     }
0036     mutex_unlock(&smc->clcsock_release_lock);
0037 }
0038 
0039 static void smc_close_cleanup_listen(struct sock *parent)
0040 {
0041     struct sock *sk;
0042 
0043     /* Close non-accepted connections */
0044     while ((sk = smc_accept_dequeue(parent, NULL)))
0045         smc_close_non_accepted(sk);
0046 }
0047 
0048 /* wait for sndbuf data being transmitted */
0049 static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
0050 {
0051     DEFINE_WAIT_FUNC(wait, woken_wake_function);
0052     struct sock *sk = &smc->sk;
0053 
0054     if (!timeout)
0055         return;
0056 
0057     if (!smc_tx_prepared_sends(&smc->conn))
0058         return;
0059 
0060     /* Send out corked data remaining in sndbuf */
0061     smc_tx_pending(&smc->conn);
0062 
0063     smc->wait_close_tx_prepared = 1;
0064     add_wait_queue(sk_sleep(sk), &wait);
0065     while (!signal_pending(current) && timeout) {
0066         int rc;
0067 
0068         rc = sk_wait_event(sk, &timeout,
0069                    !smc_tx_prepared_sends(&smc->conn) ||
0070                    sk->sk_err == ECONNABORTED ||
0071                    sk->sk_err == ECONNRESET ||
0072                    smc->conn.killed,
0073                    &wait);
0074         if (rc)
0075             break;
0076     }
0077     remove_wait_queue(sk_sleep(sk), &wait);
0078     smc->wait_close_tx_prepared = 0;
0079 }
0080 
0081 void smc_close_wake_tx_prepared(struct smc_sock *smc)
0082 {
0083     if (smc->wait_close_tx_prepared)
0084         /* wake up socket closing */
0085         smc->sk.sk_state_change(&smc->sk);
0086 }
0087 
0088 static int smc_close_wr(struct smc_connection *conn)
0089 {
0090     conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
0091 
0092     return smc_cdc_get_slot_and_msg_send(conn);
0093 }
0094 
0095 static int smc_close_final(struct smc_connection *conn)
0096 {
0097     if (atomic_read(&conn->bytes_to_rcv))
0098         conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
0099     else
0100         conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
0101     if (conn->killed)
0102         return -EPIPE;
0103 
0104     return smc_cdc_get_slot_and_msg_send(conn);
0105 }
0106 
0107 int smc_close_abort(struct smc_connection *conn)
0108 {
0109     conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
0110 
0111     return smc_cdc_get_slot_and_msg_send(conn);
0112 }
0113 
0114 static void smc_close_cancel_work(struct smc_sock *smc)
0115 {
0116     struct sock *sk = &smc->sk;
0117 
0118     release_sock(sk);
0119     cancel_work_sync(&smc->conn.close_work);
0120     cancel_delayed_work_sync(&smc->conn.tx_work);
0121     lock_sock(sk);
0122 }
0123 
0124 /* terminate smc socket abnormally - active abort
0125  * link group is terminated, i.e. RDMA communication no longer possible
0126  */
0127 void smc_close_active_abort(struct smc_sock *smc)
0128 {
0129     struct sock *sk = &smc->sk;
0130     bool release_clcsock = false;
0131 
0132     if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
0133         sk->sk_err = ECONNABORTED;
0134         if (smc->clcsock && smc->clcsock->sk)
0135             tcp_abort(smc->clcsock->sk, ECONNABORTED);
0136     }
0137     switch (sk->sk_state) {
0138     case SMC_ACTIVE:
0139     case SMC_APPCLOSEWAIT1:
0140     case SMC_APPCLOSEWAIT2:
0141         sk->sk_state = SMC_PEERABORTWAIT;
0142         smc_close_cancel_work(smc);
0143         if (sk->sk_state != SMC_PEERABORTWAIT)
0144             break;
0145         sk->sk_state = SMC_CLOSED;
0146         sock_put(sk); /* (postponed) passive closing */
0147         break;
0148     case SMC_PEERCLOSEWAIT1:
0149     case SMC_PEERCLOSEWAIT2:
0150     case SMC_PEERFINCLOSEWAIT:
0151         sk->sk_state = SMC_PEERABORTWAIT;
0152         smc_close_cancel_work(smc);
0153         if (sk->sk_state != SMC_PEERABORTWAIT)
0154             break;
0155         sk->sk_state = SMC_CLOSED;
0156         smc_conn_free(&smc->conn);
0157         release_clcsock = true;
0158         sock_put(sk); /* passive closing */
0159         break;
0160     case SMC_PROCESSABORT:
0161     case SMC_APPFINCLOSEWAIT:
0162         sk->sk_state = SMC_PEERABORTWAIT;
0163         smc_close_cancel_work(smc);
0164         if (sk->sk_state != SMC_PEERABORTWAIT)
0165             break;
0166         sk->sk_state = SMC_CLOSED;
0167         smc_conn_free(&smc->conn);
0168         release_clcsock = true;
0169         break;
0170     case SMC_INIT:
0171     case SMC_PEERABORTWAIT:
0172     case SMC_CLOSED:
0173         break;
0174     }
0175 
0176     sock_set_flag(sk, SOCK_DEAD);
0177     sk->sk_state_change(sk);
0178 
0179     if (release_clcsock) {
0180         release_sock(sk);
0181         smc_clcsock_release(smc);
0182         lock_sock(sk);
0183     }
0184 }
0185 
0186 static inline bool smc_close_sent_any_close(struct smc_connection *conn)
0187 {
0188     return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
0189            conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
0190 }
0191 
0192 int smc_close_active(struct smc_sock *smc)
0193 {
0194     struct smc_cdc_conn_state_flags *txflags =
0195         &smc->conn.local_tx_ctrl.conn_state_flags;
0196     struct smc_connection *conn = &smc->conn;
0197     struct sock *sk = &smc->sk;
0198     int old_state;
0199     long timeout;
0200     int rc = 0;
0201     int rc1 = 0;
0202 
0203     timeout = current->flags & PF_EXITING ?
0204           0 : sock_flag(sk, SOCK_LINGER) ?
0205               sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
0206 
0207     old_state = sk->sk_state;
0208 again:
0209     switch (sk->sk_state) {
0210     case SMC_INIT:
0211         sk->sk_state = SMC_CLOSED;
0212         break;
0213     case SMC_LISTEN:
0214         sk->sk_state = SMC_CLOSED;
0215         sk->sk_state_change(sk); /* wake up accept */
0216         if (smc->clcsock && smc->clcsock->sk) {
0217             write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
0218             smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
0219                            &smc->clcsk_data_ready);
0220             smc->clcsock->sk->sk_user_data = NULL;
0221             write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
0222             rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
0223         }
0224         smc_close_cleanup_listen(sk);
0225         release_sock(sk);
0226         flush_work(&smc->tcp_listen_work);
0227         lock_sock(sk);
0228         break;
0229     case SMC_ACTIVE:
0230         smc_close_stream_wait(smc, timeout);
0231         release_sock(sk);
0232         cancel_delayed_work_sync(&conn->tx_work);
0233         lock_sock(sk);
0234         if (sk->sk_state == SMC_ACTIVE) {
0235             /* send close request */
0236             rc = smc_close_final(conn);
0237             sk->sk_state = SMC_PEERCLOSEWAIT1;
0238 
0239             /* actively shutdown clcsock before peer close it,
0240              * prevent peer from entering TIME_WAIT state.
0241              */
0242             if (smc->clcsock && smc->clcsock->sk) {
0243                 rc1 = kernel_sock_shutdown(smc->clcsock,
0244                                SHUT_RDWR);
0245                 rc = rc ? rc : rc1;
0246             }
0247         } else {
0248             /* peer event has changed the state */
0249             goto again;
0250         }
0251         break;
0252     case SMC_APPFINCLOSEWAIT:
0253         /* socket already shutdown wr or both (active close) */
0254         if (txflags->peer_done_writing &&
0255             !smc_close_sent_any_close(conn)) {
0256             /* just shutdown wr done, send close request */
0257             rc = smc_close_final(conn);
0258         }
0259         sk->sk_state = SMC_CLOSED;
0260         break;
0261     case SMC_APPCLOSEWAIT1:
0262     case SMC_APPCLOSEWAIT2:
0263         if (!smc_cdc_rxed_any_close(conn))
0264             smc_close_stream_wait(smc, timeout);
0265         release_sock(sk);
0266         cancel_delayed_work_sync(&conn->tx_work);
0267         lock_sock(sk);
0268         if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
0269             sk->sk_state != SMC_APPCLOSEWAIT2)
0270             goto again;
0271         /* confirm close from peer */
0272         rc = smc_close_final(conn);
0273         if (smc_cdc_rxed_any_close(conn)) {
0274             /* peer has closed the socket already */
0275             sk->sk_state = SMC_CLOSED;
0276             sock_put(sk); /* postponed passive closing */
0277         } else {
0278             /* peer has just issued a shutdown write */
0279             sk->sk_state = SMC_PEERFINCLOSEWAIT;
0280         }
0281         break;
0282     case SMC_PEERCLOSEWAIT1:
0283     case SMC_PEERCLOSEWAIT2:
0284         if (txflags->peer_done_writing &&
0285             !smc_close_sent_any_close(conn)) {
0286             /* just shutdown wr done, send close request */
0287             rc = smc_close_final(conn);
0288         }
0289         /* peer sending PeerConnectionClosed will cause transition */
0290         break;
0291     case SMC_PEERFINCLOSEWAIT:
0292         /* peer sending PeerConnectionClosed will cause transition */
0293         break;
0294     case SMC_PROCESSABORT:
0295         rc = smc_close_abort(conn);
0296         sk->sk_state = SMC_CLOSED;
0297         break;
0298     case SMC_PEERABORTWAIT:
0299         sk->sk_state = SMC_CLOSED;
0300         break;
0301     case SMC_CLOSED:
0302         /* nothing to do, add tracing in future patch */
0303         break;
0304     }
0305 
0306     if (old_state != sk->sk_state)
0307         sk->sk_state_change(sk);
0308     return rc;
0309 }
0310 
0311 static void smc_close_passive_abort_received(struct smc_sock *smc)
0312 {
0313     struct smc_cdc_conn_state_flags *txflags =
0314         &smc->conn.local_tx_ctrl.conn_state_flags;
0315     struct sock *sk = &smc->sk;
0316 
0317     switch (sk->sk_state) {
0318     case SMC_INIT:
0319     case SMC_ACTIVE:
0320     case SMC_APPCLOSEWAIT1:
0321         sk->sk_state = SMC_PROCESSABORT;
0322         sock_put(sk); /* passive closing */
0323         break;
0324     case SMC_APPFINCLOSEWAIT:
0325         sk->sk_state = SMC_PROCESSABORT;
0326         break;
0327     case SMC_PEERCLOSEWAIT1:
0328     case SMC_PEERCLOSEWAIT2:
0329         if (txflags->peer_done_writing &&
0330             !smc_close_sent_any_close(&smc->conn))
0331             /* just shutdown, but not yet closed locally */
0332             sk->sk_state = SMC_PROCESSABORT;
0333         else
0334             sk->sk_state = SMC_CLOSED;
0335         sock_put(sk); /* passive closing */
0336         break;
0337     case SMC_APPCLOSEWAIT2:
0338     case SMC_PEERFINCLOSEWAIT:
0339         sk->sk_state = SMC_CLOSED;
0340         sock_put(sk); /* passive closing */
0341         break;
0342     case SMC_PEERABORTWAIT:
0343         sk->sk_state = SMC_CLOSED;
0344         break;
0345     case SMC_PROCESSABORT:
0346     /* nothing to do, add tracing in future patch */
0347         break;
0348     }
0349 }
0350 
0351 /* Either some kind of closing has been received: peer_conn_closed,
0352  * peer_conn_abort, or peer_done_writing
0353  * or the link group of the connection terminates abnormally.
0354  */
0355 static void smc_close_passive_work(struct work_struct *work)
0356 {
0357     struct smc_connection *conn = container_of(work,
0358                            struct smc_connection,
0359                            close_work);
0360     struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
0361     struct smc_cdc_conn_state_flags *rxflags;
0362     bool release_clcsock = false;
0363     struct sock *sk = &smc->sk;
0364     int old_state;
0365 
0366     lock_sock(sk);
0367     old_state = sk->sk_state;
0368 
0369     rxflags = &conn->local_rx_ctrl.conn_state_flags;
0370     if (rxflags->peer_conn_abort) {
0371         /* peer has not received all data */
0372         smc_close_passive_abort_received(smc);
0373         release_sock(sk);
0374         cancel_delayed_work_sync(&conn->tx_work);
0375         lock_sock(sk);
0376         goto wakeup;
0377     }
0378 
0379     switch (sk->sk_state) {
0380     case SMC_INIT:
0381         sk->sk_state = SMC_APPCLOSEWAIT1;
0382         break;
0383     case SMC_ACTIVE:
0384         sk->sk_state = SMC_APPCLOSEWAIT1;
0385         /* postpone sock_put() for passive closing to cover
0386          * received SEND_SHUTDOWN as well
0387          */
0388         break;
0389     case SMC_PEERCLOSEWAIT1:
0390         if (rxflags->peer_done_writing)
0391             sk->sk_state = SMC_PEERCLOSEWAIT2;
0392         fallthrough;
0393         /* to check for closing */
0394     case SMC_PEERCLOSEWAIT2:
0395         if (!smc_cdc_rxed_any_close(conn))
0396             break;
0397         if (sock_flag(sk, SOCK_DEAD) &&
0398             smc_close_sent_any_close(conn)) {
0399             /* smc_release has already been called locally */
0400             sk->sk_state = SMC_CLOSED;
0401         } else {
0402             /* just shutdown, but not yet closed locally */
0403             sk->sk_state = SMC_APPFINCLOSEWAIT;
0404         }
0405         sock_put(sk); /* passive closing */
0406         break;
0407     case SMC_PEERFINCLOSEWAIT:
0408         if (smc_cdc_rxed_any_close(conn)) {
0409             sk->sk_state = SMC_CLOSED;
0410             sock_put(sk); /* passive closing */
0411         }
0412         break;
0413     case SMC_APPCLOSEWAIT1:
0414     case SMC_APPCLOSEWAIT2:
0415         /* postpone sock_put() for passive closing to cover
0416          * received SEND_SHUTDOWN as well
0417          */
0418         break;
0419     case SMC_APPFINCLOSEWAIT:
0420     case SMC_PEERABORTWAIT:
0421     case SMC_PROCESSABORT:
0422     case SMC_CLOSED:
0423         /* nothing to do, add tracing in future patch */
0424         break;
0425     }
0426 
0427 wakeup:
0428     sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
0429     sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
0430 
0431     if (old_state != sk->sk_state) {
0432         sk->sk_state_change(sk);
0433         if ((sk->sk_state == SMC_CLOSED) &&
0434             (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
0435             smc_conn_free(conn);
0436             if (smc->clcsock)
0437                 release_clcsock = true;
0438         }
0439     }
0440     release_sock(sk);
0441     if (release_clcsock)
0442         smc_clcsock_release(smc);
0443     sock_put(sk); /* sock_hold done by schedulers of close_work */
0444 }
0445 
0446 int smc_close_shutdown_write(struct smc_sock *smc)
0447 {
0448     struct smc_connection *conn = &smc->conn;
0449     struct sock *sk = &smc->sk;
0450     int old_state;
0451     long timeout;
0452     int rc = 0;
0453 
0454     timeout = current->flags & PF_EXITING ?
0455           0 : sock_flag(sk, SOCK_LINGER) ?
0456               sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
0457 
0458     old_state = sk->sk_state;
0459 again:
0460     switch (sk->sk_state) {
0461     case SMC_ACTIVE:
0462         smc_close_stream_wait(smc, timeout);
0463         release_sock(sk);
0464         cancel_delayed_work_sync(&conn->tx_work);
0465         lock_sock(sk);
0466         if (sk->sk_state != SMC_ACTIVE)
0467             goto again;
0468         /* send close wr request */
0469         rc = smc_close_wr(conn);
0470         sk->sk_state = SMC_PEERCLOSEWAIT1;
0471         break;
0472     case SMC_APPCLOSEWAIT1:
0473         /* passive close */
0474         if (!smc_cdc_rxed_any_close(conn))
0475             smc_close_stream_wait(smc, timeout);
0476         release_sock(sk);
0477         cancel_delayed_work_sync(&conn->tx_work);
0478         lock_sock(sk);
0479         if (sk->sk_state != SMC_APPCLOSEWAIT1)
0480             goto again;
0481         /* confirm close from peer */
0482         rc = smc_close_wr(conn);
0483         sk->sk_state = SMC_APPCLOSEWAIT2;
0484         break;
0485     case SMC_APPCLOSEWAIT2:
0486     case SMC_PEERFINCLOSEWAIT:
0487     case SMC_PEERCLOSEWAIT1:
0488     case SMC_PEERCLOSEWAIT2:
0489     case SMC_APPFINCLOSEWAIT:
0490     case SMC_PROCESSABORT:
0491     case SMC_PEERABORTWAIT:
0492         /* nothing to do, add tracing in future patch */
0493         break;
0494     }
0495 
0496     if (old_state != sk->sk_state)
0497         sk->sk_state_change(sk);
0498     return rc;
0499 }
0500 
0501 /* Initialize close properties on connection establishment. */
0502 void smc_close_init(struct smc_sock *smc)
0503 {
0504     INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
0505 }