Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Multipath TCP
0003  *
0004  * Copyright (c) 2019, Intel Corporation.
0005  */
0006 #define pr_fmt(fmt) "MPTCP: " fmt
0007 
0008 #include <linux/kernel.h>
0009 #include <net/tcp.h>
0010 #include <net/mptcp.h>
0011 #include "protocol.h"
0012 
0013 #include "mib.h"
0014 
0015 /* path manager command handlers */
0016 
0017 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
0018                const struct mptcp_addr_info *addr,
0019                bool echo)
0020 {
0021     u8 add_addr = READ_ONCE(msk->pm.addr_signal);
0022 
0023     pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo);
0024 
0025     lockdep_assert_held(&msk->pm.lock);
0026 
0027     if (add_addr &
0028         (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
0029         pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
0030         return -EINVAL;
0031     }
0032 
0033     if (echo) {
0034         msk->pm.remote = *addr;
0035         add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
0036     } else {
0037         msk->pm.local = *addr;
0038         add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
0039     }
0040     WRITE_ONCE(msk->pm.addr_signal, add_addr);
0041     return 0;
0042 }
0043 
0044 int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
0045 {
0046     u8 rm_addr = READ_ONCE(msk->pm.addr_signal);
0047 
0048     pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
0049 
0050     if (rm_addr) {
0051         pr_warn("addr_signal error, rm_addr=%d", rm_addr);
0052         return -EINVAL;
0053     }
0054 
0055     msk->pm.rm_list_tx = *rm_list;
0056     rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
0057     WRITE_ONCE(msk->pm.addr_signal, rm_addr);
0058     mptcp_pm_nl_addr_send_ack(msk);
0059     return 0;
0060 }
0061 
0062 int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
0063 {
0064     pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
0065 
0066     spin_lock_bh(&msk->pm.lock);
0067     mptcp_pm_nl_rm_subflow_received(msk, rm_list);
0068     spin_unlock_bh(&msk->pm.lock);
0069     return 0;
0070 }
0071 
0072 /* path manager event handlers */
0073 
0074 void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
0075 {
0076     struct mptcp_pm_data *pm = &msk->pm;
0077 
0078     pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
0079 
0080     WRITE_ONCE(pm->server_side, server_side);
0081     mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
0082 }
0083 
0084 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
0085 {
0086     struct mptcp_pm_data *pm = &msk->pm;
0087     unsigned int subflows_max;
0088     int ret = 0;
0089 
0090     if (mptcp_pm_is_userspace(msk))
0091         return mptcp_userspace_pm_active(msk);
0092 
0093     subflows_max = mptcp_pm_get_subflows_max(msk);
0094 
0095     pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
0096          subflows_max, READ_ONCE(pm->accept_subflow));
0097 
0098     /* try to avoid acquiring the lock below */
0099     if (!READ_ONCE(pm->accept_subflow))
0100         return false;
0101 
0102     spin_lock_bh(&pm->lock);
0103     if (READ_ONCE(pm->accept_subflow)) {
0104         ret = pm->subflows < subflows_max;
0105         if (ret && ++pm->subflows == subflows_max)
0106             WRITE_ONCE(pm->accept_subflow, false);
0107     }
0108     spin_unlock_bh(&pm->lock);
0109 
0110     return ret;
0111 }
0112 
0113 /* return true if the new status bit is currently cleared, that is, this event
0114  * can be server, eventually by an already scheduled work
0115  */
0116 static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
0117                    enum mptcp_pm_status new_status)
0118 {
0119     pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status,
0120          BIT(new_status));
0121     if (msk->pm.status & BIT(new_status))
0122         return false;
0123 
0124     msk->pm.status |= BIT(new_status);
0125     mptcp_schedule_work((struct sock *)msk);
0126     return true;
0127 }
0128 
0129 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
0130 {
0131     struct mptcp_pm_data *pm = &msk->pm;
0132     bool announce = false;
0133 
0134     pr_debug("msk=%p", msk);
0135 
0136     spin_lock_bh(&pm->lock);
0137 
0138     /* mptcp_pm_fully_established() can be invoked by multiple
0139      * racing paths - accept() and check_fully_established()
0140      * be sure to serve this event only once.
0141      */
0142     if (READ_ONCE(pm->work_pending) &&
0143         !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
0144         mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
0145 
0146     if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
0147         announce = true;
0148 
0149     msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
0150     spin_unlock_bh(&pm->lock);
0151 
0152     if (announce)
0153         mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
0154 }
0155 
0156 void mptcp_pm_connection_closed(struct mptcp_sock *msk)
0157 {
0158     pr_debug("msk=%p", msk);
0159 }
0160 
0161 void mptcp_pm_subflow_established(struct mptcp_sock *msk)
0162 {
0163     struct mptcp_pm_data *pm = &msk->pm;
0164 
0165     pr_debug("msk=%p", msk);
0166 
0167     if (!READ_ONCE(pm->work_pending))
0168         return;
0169 
0170     spin_lock_bh(&pm->lock);
0171 
0172     if (READ_ONCE(pm->work_pending))
0173         mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
0174 
0175     spin_unlock_bh(&pm->lock);
0176 }
0177 
0178 void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
0179                  const struct mptcp_subflow_context *subflow)
0180 {
0181     struct mptcp_pm_data *pm = &msk->pm;
0182     bool update_subflows;
0183 
0184     update_subflows = (subflow->request_join || subflow->mp_join) &&
0185               mptcp_pm_is_kernel(msk);
0186     if (!READ_ONCE(pm->work_pending) && !update_subflows)
0187         return;
0188 
0189     spin_lock_bh(&pm->lock);
0190     if (update_subflows)
0191         __mptcp_pm_close_subflow(msk);
0192 
0193     /* Even if this subflow is not really established, tell the PM to try
0194      * to pick the next ones, if possible.
0195      */
0196     if (mptcp_pm_nl_check_work_pending(msk))
0197         mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
0198 
0199     spin_unlock_bh(&pm->lock);
0200 }
0201 
0202 void mptcp_pm_add_addr_received(const struct sock *ssk,
0203                 const struct mptcp_addr_info *addr)
0204 {
0205     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
0206     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
0207     struct mptcp_pm_data *pm = &msk->pm;
0208 
0209     pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
0210          READ_ONCE(pm->accept_addr));
0211 
0212     mptcp_event_addr_announced(ssk, addr);
0213 
0214     spin_lock_bh(&pm->lock);
0215 
0216     if (mptcp_pm_is_userspace(msk)) {
0217         if (mptcp_userspace_pm_active(msk)) {
0218             mptcp_pm_announce_addr(msk, addr, true);
0219             mptcp_pm_add_addr_send_ack(msk);
0220         } else {
0221             __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
0222         }
0223     } else if (!READ_ONCE(pm->accept_addr)) {
0224         mptcp_pm_announce_addr(msk, addr, true);
0225         mptcp_pm_add_addr_send_ack(msk);
0226     } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
0227         pm->remote = *addr;
0228     } else {
0229         __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
0230     }
0231 
0232     spin_unlock_bh(&pm->lock);
0233 }
0234 
0235 void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
0236                   const struct mptcp_addr_info *addr)
0237 {
0238     struct mptcp_pm_data *pm = &msk->pm;
0239 
0240     pr_debug("msk=%p", msk);
0241 
0242     spin_lock_bh(&pm->lock);
0243 
0244     if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending))
0245         mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
0246 
0247     spin_unlock_bh(&pm->lock);
0248 }
0249 
0250 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
0251 {
0252     if (!mptcp_pm_should_add_signal(msk))
0253         return;
0254 
0255     mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
0256 }
0257 
0258 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
0259                    const struct mptcp_rm_list *rm_list)
0260 {
0261     struct mptcp_pm_data *pm = &msk->pm;
0262     u8 i;
0263 
0264     pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr);
0265 
0266     for (i = 0; i < rm_list->nr; i++)
0267         mptcp_event_addr_removed(msk, rm_list->ids[i]);
0268 
0269     spin_lock_bh(&pm->lock);
0270     if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED))
0271         pm->rm_list_rx = *rm_list;
0272     else
0273         __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP);
0274     spin_unlock_bh(&pm->lock);
0275 }
0276 
0277 void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
0278 {
0279     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
0280     struct sock *sk = subflow->conn;
0281     struct mptcp_sock *msk;
0282 
0283     pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
0284     msk = mptcp_sk(sk);
0285     if (subflow->backup != bkup) {
0286         subflow->backup = bkup;
0287         mptcp_data_lock(sk);
0288         if (!sock_owned_by_user(sk))
0289             msk->last_snd = NULL;
0290         else
0291             __set_bit(MPTCP_RESET_SCHEDULER,  &msk->cb_flags);
0292         mptcp_data_unlock(sk);
0293     }
0294 
0295     mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
0296 }
0297 
0298 void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
0299 {
0300     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
0301     struct mptcp_sock *msk = mptcp_sk(subflow->conn);
0302 
0303     pr_debug("fail_seq=%llu", fail_seq);
0304 
0305     if (!READ_ONCE(msk->allow_infinite_fallback))
0306         return;
0307 
0308     if (!subflow->fail_tout) {
0309         pr_debug("send MP_FAIL response and infinite map");
0310 
0311         subflow->send_mp_fail = 1;
0312         subflow->send_infinite_map = 1;
0313         tcp_send_ack(sk);
0314     } else {
0315         pr_debug("MP_FAIL response received");
0316         WRITE_ONCE(subflow->fail_tout, 0);
0317     }
0318 }
0319 
0320 /* path manager helpers */
0321 
0322 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
0323                   unsigned int opt_size, unsigned int remaining,
0324                   struct mptcp_addr_info *addr, bool *echo,
0325                   bool *drop_other_suboptions)
0326 {
0327     int ret = false;
0328     u8 add_addr;
0329     u8 family;
0330     bool port;
0331 
0332     spin_lock_bh(&msk->pm.lock);
0333 
0334     /* double check after the lock is acquired */
0335     if (!mptcp_pm_should_add_signal(msk))
0336         goto out_unlock;
0337 
0338     /* always drop every other options for pure ack ADD_ADDR; this is a
0339      * plain dup-ack from TCP perspective. The other MPTCP-relevant info,
0340      * if any, will be carried by the 'original' TCP ack
0341      */
0342     if (skb && skb_is_tcp_pure_ack(skb)) {
0343         remaining += opt_size;
0344         *drop_other_suboptions = true;
0345     }
0346 
0347     *echo = mptcp_pm_should_add_signal_echo(msk);
0348     port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
0349 
0350     family = *echo ? msk->pm.remote.family : msk->pm.local.family;
0351     if (remaining < mptcp_add_addr_len(family, *echo, port))
0352         goto out_unlock;
0353 
0354     if (*echo) {
0355         *addr = msk->pm.remote;
0356         add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
0357     } else {
0358         *addr = msk->pm.local;
0359         add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
0360     }
0361     WRITE_ONCE(msk->pm.addr_signal, add_addr);
0362     ret = true;
0363 
0364 out_unlock:
0365     spin_unlock_bh(&msk->pm.lock);
0366     return ret;
0367 }
0368 
0369 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
0370                  struct mptcp_rm_list *rm_list)
0371 {
0372     int ret = false, len;
0373     u8 rm_addr;
0374 
0375     spin_lock_bh(&msk->pm.lock);
0376 
0377     /* double check after the lock is acquired */
0378     if (!mptcp_pm_should_rm_signal(msk))
0379         goto out_unlock;
0380 
0381     rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL);
0382     len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
0383     if (len < 0) {
0384         WRITE_ONCE(msk->pm.addr_signal, rm_addr);
0385         goto out_unlock;
0386     }
0387     if (remaining < len)
0388         goto out_unlock;
0389 
0390     *rm_list = msk->pm.rm_list_tx;
0391     WRITE_ONCE(msk->pm.addr_signal, rm_addr);
0392     ret = true;
0393 
0394 out_unlock:
0395     spin_unlock_bh(&msk->pm.lock);
0396     return ret;
0397 }
0398 
0399 int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
0400 {
0401     return mptcp_pm_nl_get_local_id(msk, skc);
0402 }
0403 
0404 void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
0405 {
0406     struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
0407     u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp);
0408 
0409     /* keep track of rtx periods with no progress */
0410     if (!subflow->stale_count) {
0411         subflow->stale_rcv_tstamp = rcv_tstamp;
0412         subflow->stale_count++;
0413     } else if (subflow->stale_rcv_tstamp == rcv_tstamp) {
0414         if (subflow->stale_count < U8_MAX)
0415             subflow->stale_count++;
0416         mptcp_pm_nl_subflow_chk_stale(msk, ssk);
0417     } else {
0418         subflow->stale_count = 0;
0419         mptcp_subflow_set_active(subflow);
0420     }
0421 }
0422 
0423 void mptcp_pm_data_reset(struct mptcp_sock *msk)
0424 {
0425     u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
0426     struct mptcp_pm_data *pm = &msk->pm;
0427 
0428     pm->add_addr_signaled = 0;
0429     pm->add_addr_accepted = 0;
0430     pm->local_addr_used = 0;
0431     pm->subflows = 0;
0432     pm->rm_list_tx.nr = 0;
0433     pm->rm_list_rx.nr = 0;
0434     WRITE_ONCE(pm->pm_type, pm_type);
0435 
0436     if (pm_type == MPTCP_PM_TYPE_KERNEL) {
0437         bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk);
0438 
0439         /* pm->work_pending must be only be set to 'true' when
0440          * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL
0441          */
0442         WRITE_ONCE(pm->work_pending,
0443                (!!mptcp_pm_get_local_addr_max(msk) &&
0444                 subflows_allowed) ||
0445                !!mptcp_pm_get_add_addr_signal_max(msk));
0446         WRITE_ONCE(pm->accept_addr,
0447                !!mptcp_pm_get_add_addr_accept_max(msk) &&
0448                subflows_allowed);
0449         WRITE_ONCE(pm->accept_subflow, subflows_allowed);
0450     } else {
0451         WRITE_ONCE(pm->work_pending, 0);
0452         WRITE_ONCE(pm->accept_addr, 0);
0453         WRITE_ONCE(pm->accept_subflow, 0);
0454     }
0455 
0456     WRITE_ONCE(pm->addr_signal, 0);
0457     WRITE_ONCE(pm->remote_deny_join_id0, false);
0458     pm->status = 0;
0459     bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
0460 }
0461 
0462 void mptcp_pm_data_init(struct mptcp_sock *msk)
0463 {
0464     spin_lock_init(&msk->pm.lock);
0465     INIT_LIST_HEAD(&msk->pm.anno_list);
0466     INIT_LIST_HEAD(&msk->pm.userspace_pm_local_addr_list);
0467     mptcp_pm_data_reset(msk);
0468 }
0469 
0470 void __init mptcp_pm_init(void)
0471 {
0472     mptcp_pm_nl_init();
0473 }