Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * INET     An implementation of the TCP/IP protocol suite for the LINUX
0004  *      operating system.  INET is implemented using the  BSD Socket
0005  *      interface as the means of communication with the user level.
0006  *
0007  *      Generic TIME_WAIT sockets functions
0008  *
0009  *      From code orinally in TCP
0010  */
0011 
0012 #include <linux/kernel.h>
0013 #include <linux/slab.h>
0014 #include <linux/module.h>
0015 #include <net/inet_hashtables.h>
0016 #include <net/inet_timewait_sock.h>
0017 #include <net/ip.h>
0018 
0019 
0020 /**
0021  *  inet_twsk_bind_unhash - unhash a timewait socket from bind hash
0022  *  @tw: timewait socket
0023  *  @hashinfo: hashinfo pointer
0024  *
0025  *  unhash a timewait socket from bind hash, if hashed.
0026  *  bind hash lock must be held by caller.
0027  *  Returns 1 if caller should call inet_twsk_put() after lock release.
0028  */
0029 void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
0030               struct inet_hashinfo *hashinfo)
0031 {
0032     struct inet_bind_bucket *tb = tw->tw_tb;
0033 
0034     if (!tb)
0035         return;
0036 
0037     __hlist_del(&tw->tw_bind_node);
0038     tw->tw_tb = NULL;
0039     inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
0040     __sock_put((struct sock *)tw);
0041 }
0042 
0043 /* Must be called with locally disabled BHs. */
0044 static void inet_twsk_kill(struct inet_timewait_sock *tw)
0045 {
0046     struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
0047     spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
0048     struct inet_bind_hashbucket *bhead;
0049 
0050     spin_lock(lock);
0051     sk_nulls_del_node_init_rcu((struct sock *)tw);
0052     spin_unlock(lock);
0053 
0054     /* Disassociate with bind bucket. */
0055     bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
0056             hashinfo->bhash_size)];
0057 
0058     spin_lock(&bhead->lock);
0059     inet_twsk_bind_unhash(tw, hashinfo);
0060     spin_unlock(&bhead->lock);
0061 
0062     if (refcount_dec_and_test(&tw->tw_dr->tw_refcount))
0063         kfree(tw->tw_dr);
0064 
0065     inet_twsk_put(tw);
0066 }
0067 
0068 void inet_twsk_free(struct inet_timewait_sock *tw)
0069 {
0070     struct module *owner = tw->tw_prot->owner;
0071     twsk_destructor((struct sock *)tw);
0072 #ifdef SOCK_REFCNT_DEBUG
0073     pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw);
0074 #endif
0075     kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
0076     module_put(owner);
0077 }
0078 
0079 void inet_twsk_put(struct inet_timewait_sock *tw)
0080 {
0081     if (refcount_dec_and_test(&tw->tw_refcnt))
0082         inet_twsk_free(tw);
0083 }
0084 EXPORT_SYMBOL_GPL(inet_twsk_put);
0085 
0086 static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
0087                    struct hlist_nulls_head *list)
0088 {
0089     hlist_nulls_add_head_rcu(&tw->tw_node, list);
0090 }
0091 
0092 static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
0093                     struct hlist_head *list)
0094 {
0095     hlist_add_head(&tw->tw_bind_node, list);
0096 }
0097 
0098 /*
0099  * Enter the time wait state. This is called with locally disabled BH.
0100  * Essentially we whip up a timewait bucket, copy the relevant info into it
0101  * from the SK, and mess with hash chains and list linkage.
0102  */
0103 void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
0104                struct inet_hashinfo *hashinfo)
0105 {
0106     const struct inet_sock *inet = inet_sk(sk);
0107     const struct inet_connection_sock *icsk = inet_csk(sk);
0108     struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
0109     spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
0110     struct inet_bind_hashbucket *bhead;
0111     /* Step 1: Put TW into bind hash. Original socket stays there too.
0112        Note, that any socket with inet->num != 0 MUST be bound in
0113        binding cache, even if it is closed.
0114      */
0115     bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
0116             hashinfo->bhash_size)];
0117     spin_lock(&bhead->lock);
0118     tw->tw_tb = icsk->icsk_bind_hash;
0119     WARN_ON(!icsk->icsk_bind_hash);
0120     inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
0121     spin_unlock(&bhead->lock);
0122 
0123     spin_lock(lock);
0124 
0125     inet_twsk_add_node_rcu(tw, &ehead->chain);
0126 
0127     /* Step 3: Remove SK from hash chain */
0128     if (__sk_nulls_del_node_init_rcu(sk))
0129         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
0130 
0131     spin_unlock(lock);
0132 
0133     /* tw_refcnt is set to 3 because we have :
0134      * - one reference for bhash chain.
0135      * - one reference for ehash chain.
0136      * - one reference for timer.
0137      * We can use atomic_set() because prior spin_lock()/spin_unlock()
0138      * committed into memory all tw fields.
0139      * Also note that after this point, we lost our implicit reference
0140      * so we are not allowed to use tw anymore.
0141      */
0142     refcount_set(&tw->tw_refcnt, 3);
0143 }
0144 EXPORT_SYMBOL_GPL(inet_twsk_hashdance);
0145 
0146 static void tw_timer_handler(struct timer_list *t)
0147 {
0148     struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
0149 
0150     inet_twsk_kill(tw);
0151 }
0152 
0153 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
0154                        struct inet_timewait_death_row *dr,
0155                        const int state)
0156 {
0157     struct inet_timewait_sock *tw;
0158 
0159     if (refcount_read(&dr->tw_refcount) - 1 >=
0160         READ_ONCE(dr->sysctl_max_tw_buckets))
0161         return NULL;
0162 
0163     tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
0164                   GFP_ATOMIC);
0165     if (tw) {
0166         const struct inet_sock *inet = inet_sk(sk);
0167 
0168         tw->tw_dr       = dr;
0169         /* Give us an identity. */
0170         tw->tw_daddr        = inet->inet_daddr;
0171         tw->tw_rcv_saddr    = inet->inet_rcv_saddr;
0172         tw->tw_bound_dev_if = sk->sk_bound_dev_if;
0173         tw->tw_tos      = inet->tos;
0174         tw->tw_num      = inet->inet_num;
0175         tw->tw_state        = TCP_TIME_WAIT;
0176         tw->tw_substate     = state;
0177         tw->tw_sport        = inet->inet_sport;
0178         tw->tw_dport        = inet->inet_dport;
0179         tw->tw_family       = sk->sk_family;
0180         tw->tw_reuse        = sk->sk_reuse;
0181         tw->tw_reuseport    = sk->sk_reuseport;
0182         tw->tw_hash     = sk->sk_hash;
0183         tw->tw_ipv6only     = 0;
0184         tw->tw_transparent  = inet->transparent;
0185         tw->tw_prot     = sk->sk_prot_creator;
0186         atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
0187         twsk_net_set(tw, sock_net(sk));
0188         timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
0189         /*
0190          * Because we use RCU lookups, we should not set tw_refcnt
0191          * to a non null value before everything is setup for this
0192          * timewait socket.
0193          */
0194         refcount_set(&tw->tw_refcnt, 0);
0195 
0196         __module_get(tw->tw_prot->owner);
0197     }
0198 
0199     return tw;
0200 }
0201 EXPORT_SYMBOL_GPL(inet_twsk_alloc);
0202 
0203 /* These are always called from BH context.  See callers in
0204  * tcp_input.c to verify this.
0205  */
0206 
0207 /* This is for handling early-kills of TIME_WAIT sockets.
0208  * Warning : consume reference.
0209  * Caller should not access tw anymore.
0210  */
0211 void inet_twsk_deschedule_put(struct inet_timewait_sock *tw)
0212 {
0213     if (del_timer_sync(&tw->tw_timer))
0214         inet_twsk_kill(tw);
0215     inet_twsk_put(tw);
0216 }
0217 EXPORT_SYMBOL(inet_twsk_deschedule_put);
0218 
0219 void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
0220 {
0221     /* timeout := RTO * 3.5
0222      *
0223      * 3.5 = 1+2+0.5 to wait for two retransmits.
0224      *
0225      * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
0226      * our ACK acking that FIN can be lost. If N subsequent retransmitted
0227      * FINs (or previous seqments) are lost (probability of such event
0228      * is p^(N+1), where p is probability to lose single packet and
0229      * time to detect the loss is about RTO*(2^N - 1) with exponential
0230      * backoff). Normal timewait length is calculated so, that we
0231      * waited at least for one retransmitted FIN (maximal RTO is 120sec).
0232      * [ BTW Linux. following BSD, violates this requirement waiting
0233      *   only for 60sec, we should wait at least for 240 secs.
0234      *   Well, 240 consumes too much of resources 8)
0235      * ]
0236      * This interval is not reduced to catch old duplicate and
0237      * responces to our wandering segments living for two MSLs.
0238      * However, if we use PAWS to detect
0239      * old duplicates, we can reduce the interval to bounds required
0240      * by RTO, rather than MSL. So, if peer understands PAWS, we
0241      * kill tw bucket after 3.5*RTO (it is important that this number
0242      * is greater than TS tick!) and detect old duplicates with help
0243      * of PAWS.
0244      */
0245 
0246     if (!rearm) {
0247         bool kill = timeo <= 4*HZ;
0248 
0249         __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED :
0250                              LINUX_MIB_TIMEWAITED);
0251         BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
0252         refcount_inc(&tw->tw_dr->tw_refcount);
0253     } else {
0254         mod_timer_pending(&tw->tw_timer, jiffies + timeo);
0255     }
0256 }
0257 EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
0258 
0259 void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
0260 {
0261     struct inet_timewait_sock *tw;
0262     struct sock *sk;
0263     struct hlist_nulls_node *node;
0264     unsigned int slot;
0265 
0266     for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
0267         struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
0268 restart_rcu:
0269         cond_resched();
0270         rcu_read_lock();
0271 restart:
0272         sk_nulls_for_each_rcu(sk, node, &head->chain) {
0273             if (sk->sk_state != TCP_TIME_WAIT)
0274                 continue;
0275             tw = inet_twsk(sk);
0276             if ((tw->tw_family != family) ||
0277                 refcount_read(&twsk_net(tw)->ns.count))
0278                 continue;
0279 
0280             if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
0281                 continue;
0282 
0283             if (unlikely((tw->tw_family != family) ||
0284                      refcount_read(&twsk_net(tw)->ns.count))) {
0285                 inet_twsk_put(tw);
0286                 goto restart;
0287             }
0288 
0289             rcu_read_unlock();
0290             local_bh_disable();
0291             inet_twsk_deschedule_put(tw);
0292             local_bh_enable();
0293             goto restart_rcu;
0294         }
0295         /* If the nulls value we got at the end of this lookup is
0296          * not the expected one, we must restart lookup.
0297          * We probably met an item that was moved to another chain.
0298          */
0299         if (get_nulls_value(node) != slot)
0300             goto restart;
0301         rcu_read_unlock();
0302     }
0303 }
0304 EXPORT_SYMBOL_GPL(inet_twsk_purge);