0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <linux/kernel.h>
0034 #include <linux/gfp.h>
0035 #include <linux/in.h>
0036 #include <net/tcp.h>
0037
0038 #include "rds.h"
0039 #include "tcp.h"
0040
0041 void rds_tcp_keepalive(struct socket *sock)
0042 {
0043
0044 int keepidle = 5;
0045 int keepcnt = 5;
0046
0047 sock_set_keepalive(sock->sk);
0048 tcp_sock_set_keepcnt(sock->sk, keepcnt);
0049 tcp_sock_set_keepidle(sock->sk, keepidle);
0050
0051
0052
0053 tcp_sock_set_keepintvl(sock->sk, keepidle);
0054 }
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065 static
0066 struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
0067 {
0068 int i;
0069 int npaths = max_t(int, 1, conn->c_npaths);
0070
0071
0072
0073
0074 if (rds_addr_cmp(&conn->c_faddr, &conn->c_laddr) >= 0) {
0075
0076
0077
0078
0079 if (npaths == 1)
0080 rds_conn_path_connect_if_down(&conn->c_path[0]);
0081 return NULL;
0082 }
0083
0084 for (i = 0; i < npaths; i++) {
0085 struct rds_conn_path *cp = &conn->c_path[i];
0086
0087 if (rds_conn_path_transition(cp, RDS_CONN_DOWN,
0088 RDS_CONN_CONNECTING) ||
0089 rds_conn_path_transition(cp, RDS_CONN_ERROR,
0090 RDS_CONN_CONNECTING)) {
0091 return cp->cp_transport_data;
0092 }
0093 }
0094 return NULL;
0095 }
0096
0097 int rds_tcp_accept_one(struct socket *sock)
0098 {
0099 struct socket *new_sock = NULL;
0100 struct rds_connection *conn;
0101 int ret;
0102 struct inet_sock *inet;
0103 struct rds_tcp_connection *rs_tcp = NULL;
0104 int conn_state;
0105 struct rds_conn_path *cp;
0106 struct in6_addr *my_addr, *peer_addr;
0107 #if !IS_ENABLED(CONFIG_IPV6)
0108 struct in6_addr saddr, daddr;
0109 #endif
0110 int dev_if = 0;
0111
0112 if (!sock)
0113 return -ENETUNREACH;
0114
0115 ret = sock_create_lite(sock->sk->sk_family,
0116 sock->sk->sk_type, sock->sk->sk_protocol,
0117 &new_sock);
0118 if (ret)
0119 goto out;
0120
0121 ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
0122 if (ret < 0)
0123 goto out;
0124
0125
0126
0127
0128
0129
0130
0131
0132 new_sock->ops = sock->ops;
0133 __module_get(new_sock->ops->owner);
0134
0135 rds_tcp_keepalive(new_sock);
0136 if (!rds_tcp_tune(new_sock)) {
0137 ret = -EINVAL;
0138 goto out;
0139 }
0140
0141 inet = inet_sk(new_sock->sk);
0142
0143 #if IS_ENABLED(CONFIG_IPV6)
0144 my_addr = &new_sock->sk->sk_v6_rcv_saddr;
0145 peer_addr = &new_sock->sk->sk_v6_daddr;
0146 #else
0147 ipv6_addr_set_v4mapped(inet->inet_saddr, &saddr);
0148 ipv6_addr_set_v4mapped(inet->inet_daddr, &daddr);
0149 my_addr = &saddr;
0150 peer_addr = &daddr;
0151 #endif
0152 rdsdebug("accepted family %d tcp %pI6c:%u -> %pI6c:%u\n",
0153 sock->sk->sk_family,
0154 my_addr, ntohs(inet->inet_sport),
0155 peer_addr, ntohs(inet->inet_dport));
0156
0157 #if IS_ENABLED(CONFIG_IPV6)
0158
0159
0160
0161
0162 if ((ipv6_addr_type(my_addr) & IPV6_ADDR_LINKLOCAL) &&
0163 !(ipv6_addr_type(peer_addr) & IPV6_ADDR_LINKLOCAL)) {
0164 struct ipv6_pinfo *inet6;
0165
0166 inet6 = inet6_sk(new_sock->sk);
0167 dev_if = inet6->mcast_oif;
0168 } else {
0169 dev_if = new_sock->sk->sk_bound_dev_if;
0170 }
0171 #endif
0172
0173 if (!rds_tcp_laddr_check(sock_net(sock->sk), peer_addr, dev_if)) {
0174
0175 ret = -EOPNOTSUPP;
0176 goto out;
0177 }
0178
0179 conn = rds_conn_create(sock_net(sock->sk),
0180 my_addr, peer_addr,
0181 &rds_tcp_transport, 0, GFP_KERNEL, dev_if);
0182
0183 if (IS_ERR(conn)) {
0184 ret = PTR_ERR(conn);
0185 goto out;
0186 }
0187
0188
0189
0190
0191
0192 rs_tcp = rds_tcp_accept_one_path(conn);
0193 if (!rs_tcp)
0194 goto rst_nsk;
0195 mutex_lock(&rs_tcp->t_conn_path_lock);
0196 cp = rs_tcp->t_cpath;
0197 conn_state = rds_conn_path_state(cp);
0198 WARN_ON(conn_state == RDS_CONN_UP);
0199 if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR)
0200 goto rst_nsk;
0201 if (rs_tcp->t_sock) {
0202
0203 rds_tcp_reset_callbacks(new_sock, cp);
0204
0205 rds_connect_path_complete(cp, RDS_CONN_RESETTING);
0206 } else {
0207 rds_tcp_set_callbacks(new_sock, cp);
0208 rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
0209 }
0210 new_sock = NULL;
0211 ret = 0;
0212 if (conn->c_npaths == 0)
0213 rds_send_ping(cp->cp_conn, cp->cp_index);
0214 goto out;
0215 rst_nsk:
0216
0217
0218
0219
0220
0221
0222 sock_no_linger(new_sock->sk);
0223 kernel_sock_shutdown(new_sock, SHUT_RDWR);
0224 ret = 0;
0225 out:
0226 if (rs_tcp)
0227 mutex_unlock(&rs_tcp->t_conn_path_lock);
0228 if (new_sock)
0229 sock_release(new_sock);
0230 return ret;
0231 }
0232
0233 void rds_tcp_listen_data_ready(struct sock *sk)
0234 {
0235 void (*ready)(struct sock *sk);
0236
0237 rdsdebug("listen data ready sk %p\n", sk);
0238
0239 read_lock_bh(&sk->sk_callback_lock);
0240 ready = sk->sk_user_data;
0241 if (!ready) {
0242 ready = sk->sk_data_ready;
0243 goto out;
0244 }
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255 if (sk->sk_state == TCP_LISTEN)
0256 rds_tcp_accept_work(sk);
0257 else
0258 ready = rds_tcp_listen_sock_def_readable(sock_net(sk));
0259
0260 out:
0261 read_unlock_bh(&sk->sk_callback_lock);
0262 if (ready)
0263 ready(sk);
0264 }
0265
0266 struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
0267 {
0268 struct socket *sock = NULL;
0269 struct sockaddr_storage ss;
0270 struct sockaddr_in6 *sin6;
0271 struct sockaddr_in *sin;
0272 int addr_len;
0273 int ret;
0274
0275 ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM,
0276 IPPROTO_TCP, &sock);
0277 if (ret < 0) {
0278 rdsdebug("could not create %s listener socket: %d\n",
0279 isv6 ? "IPv6" : "IPv4", ret);
0280 goto out;
0281 }
0282
0283 sock->sk->sk_reuse = SK_CAN_REUSE;
0284 tcp_sock_set_nodelay(sock->sk);
0285
0286 write_lock_bh(&sock->sk->sk_callback_lock);
0287 sock->sk->sk_user_data = sock->sk->sk_data_ready;
0288 sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
0289 write_unlock_bh(&sock->sk->sk_callback_lock);
0290
0291 if (isv6) {
0292 sin6 = (struct sockaddr_in6 *)&ss;
0293 sin6->sin6_family = PF_INET6;
0294 sin6->sin6_addr = in6addr_any;
0295 sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
0296 sin6->sin6_scope_id = 0;
0297 sin6->sin6_flowinfo = 0;
0298 addr_len = sizeof(*sin6);
0299 } else {
0300 sin = (struct sockaddr_in *)&ss;
0301 sin->sin_family = PF_INET;
0302 sin->sin_addr.s_addr = INADDR_ANY;
0303 sin->sin_port = (__force u16)htons(RDS_TCP_PORT);
0304 addr_len = sizeof(*sin);
0305 }
0306
0307 ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
0308 if (ret < 0) {
0309 rdsdebug("could not bind %s listener socket: %d\n",
0310 isv6 ? "IPv6" : "IPv4", ret);
0311 goto out;
0312 }
0313
0314 ret = sock->ops->listen(sock, 64);
0315 if (ret < 0)
0316 goto out;
0317
0318 return sock;
0319 out:
0320 if (sock)
0321 sock_release(sock);
0322 return NULL;
0323 }
0324
0325 void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor)
0326 {
0327 struct sock *sk;
0328
0329 if (!sock)
0330 return;
0331
0332 sk = sock->sk;
0333
0334
0335 lock_sock(sk);
0336 write_lock_bh(&sk->sk_callback_lock);
0337 if (sk->sk_user_data) {
0338 sk->sk_data_ready = sk->sk_user_data;
0339 sk->sk_user_data = NULL;
0340 }
0341 write_unlock_bh(&sk->sk_callback_lock);
0342 release_sock(sk);
0343
0344
0345 flush_workqueue(rds_wq);
0346 flush_work(acceptor);
0347 sock_release(sock);
0348 }