0001
0002
0003
0004
0005
0006 #define pr_fmt(fmt) "MPTCP: " fmt
0007
0008 #include <linux/kernel.h>
0009 #include <net/tcp.h>
0010 #include <net/mptcp.h>
0011 #include "protocol.h"
0012
0013 #include "mib.h"
0014
0015
0016
0017 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
0018 const struct mptcp_addr_info *addr,
0019 bool echo)
0020 {
0021 u8 add_addr = READ_ONCE(msk->pm.addr_signal);
0022
0023 pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo);
0024
0025 lockdep_assert_held(&msk->pm.lock);
0026
0027 if (add_addr &
0028 (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
0029 pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
0030 return -EINVAL;
0031 }
0032
0033 if (echo) {
0034 msk->pm.remote = *addr;
0035 add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
0036 } else {
0037 msk->pm.local = *addr;
0038 add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
0039 }
0040 WRITE_ONCE(msk->pm.addr_signal, add_addr);
0041 return 0;
0042 }
0043
0044 int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
0045 {
0046 u8 rm_addr = READ_ONCE(msk->pm.addr_signal);
0047
0048 pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
0049
0050 if (rm_addr) {
0051 pr_warn("addr_signal error, rm_addr=%d", rm_addr);
0052 return -EINVAL;
0053 }
0054
0055 msk->pm.rm_list_tx = *rm_list;
0056 rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
0057 WRITE_ONCE(msk->pm.addr_signal, rm_addr);
0058 mptcp_pm_nl_addr_send_ack(msk);
0059 return 0;
0060 }
0061
0062 int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
0063 {
0064 pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
0065
0066 spin_lock_bh(&msk->pm.lock);
0067 mptcp_pm_nl_rm_subflow_received(msk, rm_list);
0068 spin_unlock_bh(&msk->pm.lock);
0069 return 0;
0070 }
0071
0072
0073
0074 void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
0075 {
0076 struct mptcp_pm_data *pm = &msk->pm;
0077
0078 pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
0079
0080 WRITE_ONCE(pm->server_side, server_side);
0081 mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
0082 }
0083
0084 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
0085 {
0086 struct mptcp_pm_data *pm = &msk->pm;
0087 unsigned int subflows_max;
0088 int ret = 0;
0089
0090 if (mptcp_pm_is_userspace(msk))
0091 return mptcp_userspace_pm_active(msk);
0092
0093 subflows_max = mptcp_pm_get_subflows_max(msk);
0094
0095 pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
0096 subflows_max, READ_ONCE(pm->accept_subflow));
0097
0098
0099 if (!READ_ONCE(pm->accept_subflow))
0100 return false;
0101
0102 spin_lock_bh(&pm->lock);
0103 if (READ_ONCE(pm->accept_subflow)) {
0104 ret = pm->subflows < subflows_max;
0105 if (ret && ++pm->subflows == subflows_max)
0106 WRITE_ONCE(pm->accept_subflow, false);
0107 }
0108 spin_unlock_bh(&pm->lock);
0109
0110 return ret;
0111 }
0112
0113
0114
0115
0116 static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
0117 enum mptcp_pm_status new_status)
0118 {
0119 pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status,
0120 BIT(new_status));
0121 if (msk->pm.status & BIT(new_status))
0122 return false;
0123
0124 msk->pm.status |= BIT(new_status);
0125 mptcp_schedule_work((struct sock *)msk);
0126 return true;
0127 }
0128
0129 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
0130 {
0131 struct mptcp_pm_data *pm = &msk->pm;
0132 bool announce = false;
0133
0134 pr_debug("msk=%p", msk);
0135
0136 spin_lock_bh(&pm->lock);
0137
0138
0139
0140
0141
0142 if (READ_ONCE(pm->work_pending) &&
0143 !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
0144 mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
0145
0146 if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
0147 announce = true;
0148
0149 msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
0150 spin_unlock_bh(&pm->lock);
0151
0152 if (announce)
0153 mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
0154 }
0155
0156 void mptcp_pm_connection_closed(struct mptcp_sock *msk)
0157 {
0158 pr_debug("msk=%p", msk);
0159 }
0160
0161 void mptcp_pm_subflow_established(struct mptcp_sock *msk)
0162 {
0163 struct mptcp_pm_data *pm = &msk->pm;
0164
0165 pr_debug("msk=%p", msk);
0166
0167 if (!READ_ONCE(pm->work_pending))
0168 return;
0169
0170 spin_lock_bh(&pm->lock);
0171
0172 if (READ_ONCE(pm->work_pending))
0173 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
0174
0175 spin_unlock_bh(&pm->lock);
0176 }
0177
0178 void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
0179 const struct mptcp_subflow_context *subflow)
0180 {
0181 struct mptcp_pm_data *pm = &msk->pm;
0182 bool update_subflows;
0183
0184 update_subflows = (subflow->request_join || subflow->mp_join) &&
0185 mptcp_pm_is_kernel(msk);
0186 if (!READ_ONCE(pm->work_pending) && !update_subflows)
0187 return;
0188
0189 spin_lock_bh(&pm->lock);
0190 if (update_subflows)
0191 __mptcp_pm_close_subflow(msk);
0192
0193
0194
0195
0196 if (mptcp_pm_nl_check_work_pending(msk))
0197 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
0198
0199 spin_unlock_bh(&pm->lock);
0200 }
0201
0202 void mptcp_pm_add_addr_received(const struct sock *ssk,
0203 const struct mptcp_addr_info *addr)
0204 {
0205 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
0206 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
0207 struct mptcp_pm_data *pm = &msk->pm;
0208
0209 pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
0210 READ_ONCE(pm->accept_addr));
0211
0212 mptcp_event_addr_announced(ssk, addr);
0213
0214 spin_lock_bh(&pm->lock);
0215
0216 if (mptcp_pm_is_userspace(msk)) {
0217 if (mptcp_userspace_pm_active(msk)) {
0218 mptcp_pm_announce_addr(msk, addr, true);
0219 mptcp_pm_add_addr_send_ack(msk);
0220 } else {
0221 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
0222 }
0223 } else if (!READ_ONCE(pm->accept_addr)) {
0224 mptcp_pm_announce_addr(msk, addr, true);
0225 mptcp_pm_add_addr_send_ack(msk);
0226 } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
0227 pm->remote = *addr;
0228 } else {
0229 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
0230 }
0231
0232 spin_unlock_bh(&pm->lock);
0233 }
0234
0235 void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
0236 const struct mptcp_addr_info *addr)
0237 {
0238 struct mptcp_pm_data *pm = &msk->pm;
0239
0240 pr_debug("msk=%p", msk);
0241
0242 spin_lock_bh(&pm->lock);
0243
0244 if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending))
0245 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
0246
0247 spin_unlock_bh(&pm->lock);
0248 }
0249
0250 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
0251 {
0252 if (!mptcp_pm_should_add_signal(msk))
0253 return;
0254
0255 mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
0256 }
0257
0258 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
0259 const struct mptcp_rm_list *rm_list)
0260 {
0261 struct mptcp_pm_data *pm = &msk->pm;
0262 u8 i;
0263
0264 pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr);
0265
0266 for (i = 0; i < rm_list->nr; i++)
0267 mptcp_event_addr_removed(msk, rm_list->ids[i]);
0268
0269 spin_lock_bh(&pm->lock);
0270 if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED))
0271 pm->rm_list_rx = *rm_list;
0272 else
0273 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP);
0274 spin_unlock_bh(&pm->lock);
0275 }
0276
0277 void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
0278 {
0279 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
0280 struct sock *sk = subflow->conn;
0281 struct mptcp_sock *msk;
0282
0283 pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
0284 msk = mptcp_sk(sk);
0285 if (subflow->backup != bkup) {
0286 subflow->backup = bkup;
0287 mptcp_data_lock(sk);
0288 if (!sock_owned_by_user(sk))
0289 msk->last_snd = NULL;
0290 else
0291 __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags);
0292 mptcp_data_unlock(sk);
0293 }
0294
0295 mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
0296 }
0297
0298 void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
0299 {
0300 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
0301 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
0302
0303 pr_debug("fail_seq=%llu", fail_seq);
0304
0305 if (!READ_ONCE(msk->allow_infinite_fallback))
0306 return;
0307
0308 if (!subflow->fail_tout) {
0309 pr_debug("send MP_FAIL response and infinite map");
0310
0311 subflow->send_mp_fail = 1;
0312 subflow->send_infinite_map = 1;
0313 tcp_send_ack(sk);
0314 } else {
0315 pr_debug("MP_FAIL response received");
0316 WRITE_ONCE(subflow->fail_tout, 0);
0317 }
0318 }
0319
0320
0321
0322 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
0323 unsigned int opt_size, unsigned int remaining,
0324 struct mptcp_addr_info *addr, bool *echo,
0325 bool *drop_other_suboptions)
0326 {
0327 int ret = false;
0328 u8 add_addr;
0329 u8 family;
0330 bool port;
0331
0332 spin_lock_bh(&msk->pm.lock);
0333
0334
0335 if (!mptcp_pm_should_add_signal(msk))
0336 goto out_unlock;
0337
0338
0339
0340
0341
0342 if (skb && skb_is_tcp_pure_ack(skb)) {
0343 remaining += opt_size;
0344 *drop_other_suboptions = true;
0345 }
0346
0347 *echo = mptcp_pm_should_add_signal_echo(msk);
0348 port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
0349
0350 family = *echo ? msk->pm.remote.family : msk->pm.local.family;
0351 if (remaining < mptcp_add_addr_len(family, *echo, port))
0352 goto out_unlock;
0353
0354 if (*echo) {
0355 *addr = msk->pm.remote;
0356 add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
0357 } else {
0358 *addr = msk->pm.local;
0359 add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
0360 }
0361 WRITE_ONCE(msk->pm.addr_signal, add_addr);
0362 ret = true;
0363
0364 out_unlock:
0365 spin_unlock_bh(&msk->pm.lock);
0366 return ret;
0367 }
0368
0369 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
0370 struct mptcp_rm_list *rm_list)
0371 {
0372 int ret = false, len;
0373 u8 rm_addr;
0374
0375 spin_lock_bh(&msk->pm.lock);
0376
0377
0378 if (!mptcp_pm_should_rm_signal(msk))
0379 goto out_unlock;
0380
0381 rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL);
0382 len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
0383 if (len < 0) {
0384 WRITE_ONCE(msk->pm.addr_signal, rm_addr);
0385 goto out_unlock;
0386 }
0387 if (remaining < len)
0388 goto out_unlock;
0389
0390 *rm_list = msk->pm.rm_list_tx;
0391 WRITE_ONCE(msk->pm.addr_signal, rm_addr);
0392 ret = true;
0393
0394 out_unlock:
0395 spin_unlock_bh(&msk->pm.lock);
0396 return ret;
0397 }
0398
0399 int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
0400 {
0401 return mptcp_pm_nl_get_local_id(msk, skc);
0402 }
0403
0404 void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
0405 {
0406 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
0407 u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp);
0408
0409
0410 if (!subflow->stale_count) {
0411 subflow->stale_rcv_tstamp = rcv_tstamp;
0412 subflow->stale_count++;
0413 } else if (subflow->stale_rcv_tstamp == rcv_tstamp) {
0414 if (subflow->stale_count < U8_MAX)
0415 subflow->stale_count++;
0416 mptcp_pm_nl_subflow_chk_stale(msk, ssk);
0417 } else {
0418 subflow->stale_count = 0;
0419 mptcp_subflow_set_active(subflow);
0420 }
0421 }
0422
0423 void mptcp_pm_data_reset(struct mptcp_sock *msk)
0424 {
0425 u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
0426 struct mptcp_pm_data *pm = &msk->pm;
0427
0428 pm->add_addr_signaled = 0;
0429 pm->add_addr_accepted = 0;
0430 pm->local_addr_used = 0;
0431 pm->subflows = 0;
0432 pm->rm_list_tx.nr = 0;
0433 pm->rm_list_rx.nr = 0;
0434 WRITE_ONCE(pm->pm_type, pm_type);
0435
0436 if (pm_type == MPTCP_PM_TYPE_KERNEL) {
0437 bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk);
0438
0439
0440
0441
0442 WRITE_ONCE(pm->work_pending,
0443 (!!mptcp_pm_get_local_addr_max(msk) &&
0444 subflows_allowed) ||
0445 !!mptcp_pm_get_add_addr_signal_max(msk));
0446 WRITE_ONCE(pm->accept_addr,
0447 !!mptcp_pm_get_add_addr_accept_max(msk) &&
0448 subflows_allowed);
0449 WRITE_ONCE(pm->accept_subflow, subflows_allowed);
0450 } else {
0451 WRITE_ONCE(pm->work_pending, 0);
0452 WRITE_ONCE(pm->accept_addr, 0);
0453 WRITE_ONCE(pm->accept_subflow, 0);
0454 }
0455
0456 WRITE_ONCE(pm->addr_signal, 0);
0457 WRITE_ONCE(pm->remote_deny_join_id0, false);
0458 pm->status = 0;
0459 bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
0460 }
0461
0462 void mptcp_pm_data_init(struct mptcp_sock *msk)
0463 {
0464 spin_lock_init(&msk->pm.lock);
0465 INIT_LIST_HEAD(&msk->pm.anno_list);
0466 INIT_LIST_HEAD(&msk->pm.userspace_pm_local_addr_list);
0467 mptcp_pm_data_reset(msk);
0468 }
0469
0470 void __init mptcp_pm_init(void)
0471 {
0472 mptcp_pm_nl_init();
0473 }