Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * INET     An implementation of the TCP/IP protocol suite for the LINUX
0004  *      operating system.  INET is implemented using the BSD Socket
0005  *      interface as the means of communication with the user level.
0006  *
0007  * Authors: Lotsa people, from code originally in tcp
0008  */
0009 
0010 #ifndef _INET_HASHTABLES_H
0011 #define _INET_HASHTABLES_H
0012 
0013 
0014 #include <linux/interrupt.h>
0015 #include <linux/ip.h>
0016 #include <linux/ipv6.h>
0017 #include <linux/list.h>
0018 #include <linux/slab.h>
0019 #include <linux/socket.h>
0020 #include <linux/spinlock.h>
0021 #include <linux/types.h>
0022 #include <linux/wait.h>
0023 
0024 #include <net/inet_connection_sock.h>
0025 #include <net/inet_sock.h>
0026 #include <net/sock.h>
0027 #include <net/route.h>
0028 #include <net/tcp_states.h>
0029 #include <net/netns/hash.h>
0030 
0031 #include <linux/refcount.h>
0032 #include <asm/byteorder.h>
0033 
0034 /* This is for all connections with a full identity, no wildcards.
0035  * The 'e' prefix stands for Establish, but we really put all sockets
0036  * but LISTEN ones.
0037  */
0038 struct inet_ehash_bucket {
0039     struct hlist_nulls_head chain;
0040 };
0041 
0042 /* There are a few simple rules, which allow for local port reuse by
0043  * an application.  In essence:
0044  *
0045  *  1) Sockets bound to different interfaces may share a local port.
0046  *     Failing that, goto test 2.
0047  *  2) If all sockets have sk->sk_reuse set, and none of them are in
0048  *     TCP_LISTEN state, the port may be shared.
0049  *     Failing that, goto test 3.
0050  *  3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
0051  *     address, and none of them are the same, the port may be
0052  *     shared.
0053  *     Failing this, the port cannot be shared.
0054  *
0055  * The interesting point, is test #2.  This is what an FTP server does
0056  * all day.  To optimize this case we use a specific flag bit defined
0057  * below.  As we add sockets to a bind bucket list, we perform a
0058  * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
0059  * As long as all sockets added to a bind bucket pass this test,
0060  * the flag bit will be set.
0061  * The resulting situation is that tcp_v[46]_verify_bind() can just check
0062  * for this flag bit, if it is set and the socket trying to bind has
0063  * sk->sk_reuse set, we don't even have to walk the owners list at all,
0064  * we return that it is ok to bind this socket to the requested local port.
0065  *
0066  * Sounds like a lot of work, but it is worth it.  In a more naive
0067  * implementation (ie. current FreeBSD etc.) the entire list of ports
0068  * must be walked for each data port opened by an ftp server.  Needless
0069  * to say, this does not scale at all.  With a couple thousand FTP
0070  * users logged onto your box, isn't it nice to know that new data
0071  * ports are created in O(1) time?  I thought so. ;-)   -DaveM
0072  */
0073 #define FASTREUSEPORT_ANY   1
0074 #define FASTREUSEPORT_STRICT    2
0075 
0076 struct inet_bind_bucket {
0077     possible_net_t      ib_net;
0078     int         l3mdev;
0079     unsigned short      port;
0080     signed char     fastreuse;
0081     signed char     fastreuseport;
0082     kuid_t          fastuid;
0083 #if IS_ENABLED(CONFIG_IPV6)
0084     struct in6_addr     fast_v6_rcv_saddr;
0085 #endif
0086     __be32          fast_rcv_saddr;
0087     unsigned short      fast_sk_family;
0088     bool            fast_ipv6_only;
0089     struct hlist_node   node;
0090     struct hlist_head   owners;
0091 };
0092 
0093 static inline struct net *ib_net(struct inet_bind_bucket *ib)
0094 {
0095     return read_pnet(&ib->ib_net);
0096 }
0097 
0098 #define inet_bind_bucket_for_each(tb, head) \
0099     hlist_for_each_entry(tb, head, node)
0100 
0101 struct inet_bind_hashbucket {
0102     spinlock_t      lock;
0103     struct hlist_head   chain;
0104 };
0105 
0106 /* Sockets can be hashed in established or listening table.
0107  * We must use different 'nulls' end-of-chain value for all hash buckets :
0108  * A socket might transition from ESTABLISH to LISTEN state without
0109  * RCU grace period. A lookup in ehash table needs to handle this case.
0110  */
0111 #define LISTENING_NULLS_BASE (1U << 29)
0112 struct inet_listen_hashbucket {
0113     spinlock_t      lock;
0114     struct hlist_nulls_head nulls_head;
0115 };
0116 
0117 /* This is for listening sockets, thus all sockets which possess wildcards. */
0118 #define INET_LHTABLE_SIZE   32  /* Yes, really, this is all you need. */
0119 
0120 struct inet_hashinfo {
0121     /* This is for sockets with full identity only.  Sockets here will
0122      * always be without wildcards and will have the following invariant:
0123      *
0124      *          TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
0125      *
0126      */
0127     struct inet_ehash_bucket    *ehash;
0128     spinlock_t          *ehash_locks;
0129     unsigned int            ehash_mask;
0130     unsigned int            ehash_locks_mask;
0131 
0132     /* Ok, let's try this, I give up, we do need a local binding
0133      * TCP hash as well as the others for fast bind/connect.
0134      */
0135     struct kmem_cache       *bind_bucket_cachep;
0136     struct inet_bind_hashbucket *bhash;
0137     unsigned int            bhash_size;
0138 
0139     /* The 2nd listener table hashed by local port and address */
0140     unsigned int            lhash2_mask;
0141     struct inet_listen_hashbucket   *lhash2;
0142 };
0143 
0144 static inline struct inet_listen_hashbucket *
0145 inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
0146 {
0147     return &h->lhash2[hash & h->lhash2_mask];
0148 }
0149 
0150 static inline struct inet_ehash_bucket *inet_ehash_bucket(
0151     struct inet_hashinfo *hashinfo,
0152     unsigned int hash)
0153 {
0154     return &hashinfo->ehash[hash & hashinfo->ehash_mask];
0155 }
0156 
0157 static inline spinlock_t *inet_ehash_lockp(
0158     struct inet_hashinfo *hashinfo,
0159     unsigned int hash)
0160 {
0161     return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
0162 }
0163 
0164 int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
0165 
0166 static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h)
0167 {
0168     kfree(h->lhash2);
0169     h->lhash2 = NULL;
0170 }
0171 
0172 static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
0173 {
0174     kvfree(hashinfo->ehash_locks);
0175     hashinfo->ehash_locks = NULL;
0176 }
0177 
0178 struct inet_bind_bucket *
0179 inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
0180             struct inet_bind_hashbucket *head,
0181             const unsigned short snum, int l3mdev);
0182 void inet_bind_bucket_destroy(struct kmem_cache *cachep,
0183                   struct inet_bind_bucket *tb);
0184 
0185 static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
0186                    const u32 bhash_size)
0187 {
0188     return (lport + net_hash_mix(net)) & (bhash_size - 1);
0189 }
0190 
0191 void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
0192             const unsigned short snum);
0193 
0194 /* Caller must disable local BH processing. */
0195 int __inet_inherit_port(const struct sock *sk, struct sock *child);
0196 
0197 void inet_put_port(struct sock *sk);
0198 
0199 void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
0200              unsigned long numentries, int scale,
0201              unsigned long low_limit,
0202              unsigned long high_limit);
0203 int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
0204 
0205 bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
0206 bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
0207              bool *found_dup_sk);
0208 int __inet_hash(struct sock *sk, struct sock *osk);
0209 int inet_hash(struct sock *sk);
0210 void inet_unhash(struct sock *sk);
0211 
0212 struct sock *__inet_lookup_listener(struct net *net,
0213                     struct inet_hashinfo *hashinfo,
0214                     struct sk_buff *skb, int doff,
0215                     const __be32 saddr, const __be16 sport,
0216                     const __be32 daddr,
0217                     const unsigned short hnum,
0218                     const int dif, const int sdif);
0219 
0220 static inline struct sock *inet_lookup_listener(struct net *net,
0221         struct inet_hashinfo *hashinfo,
0222         struct sk_buff *skb, int doff,
0223         __be32 saddr, __be16 sport,
0224         __be32 daddr, __be16 dport, int dif, int sdif)
0225 {
0226     return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
0227                       daddr, ntohs(dport), dif, sdif);
0228 }
0229 
0230 /* Socket demux engine toys. */
0231 /* What happens here is ugly; there's a pair of adjacent fields in
0232    struct inet_sock; __be16 dport followed by __u16 num.  We want to
0233    search by pair, so we combine the keys into a single 32bit value
0234    and compare with 32bit value read from &...->dport.  Let's at least
0235    make sure that it's not mixed with anything else...
0236    On 64bit targets we combine comparisons with pair of adjacent __be32
0237    fields in the same way.
0238 */
0239 #ifdef __BIG_ENDIAN
0240 #define INET_COMBINED_PORTS(__sport, __dport) \
0241     ((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport)))
0242 #else /* __LITTLE_ENDIAN */
0243 #define INET_COMBINED_PORTS(__sport, __dport) \
0244     ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport)))
0245 #endif
0246 
0247 #ifdef __BIG_ENDIAN
0248 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
0249     const __addrpair __name = (__force __addrpair) ( \
0250                    (((__force __u64)(__be32)(__saddr)) << 32) | \
0251                    ((__force __u64)(__be32)(__daddr)))
0252 #else /* __LITTLE_ENDIAN */
0253 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
0254     const __addrpair __name = (__force __addrpair) ( \
0255                    (((__force __u64)(__be32)(__daddr)) << 32) | \
0256                    ((__force __u64)(__be32)(__saddr)))
0257 #endif /* __BIG_ENDIAN */
0258 
0259 static inline bool inet_match(struct net *net, const struct sock *sk,
0260                   const __addrpair cookie, const __portpair ports,
0261                   int dif, int sdif)
0262 {
0263     if (!net_eq(sock_net(sk), net) ||
0264         sk->sk_portpair != ports ||
0265         sk->sk_addrpair != cookie)
0266             return false;
0267 
0268     /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
0269     return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
0270                     sdif);
0271 }
0272 
0273 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
0274  * not check it for lookups anymore, thanks Alexey. -DaveM
0275  */
0276 struct sock *__inet_lookup_established(struct net *net,
0277                        struct inet_hashinfo *hashinfo,
0278                        const __be32 saddr, const __be16 sport,
0279                        const __be32 daddr, const u16 hnum,
0280                        const int dif, const int sdif);
0281 
0282 static inline struct sock *
0283     inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
0284                 const __be32 saddr, const __be16 sport,
0285                 const __be32 daddr, const __be16 dport,
0286                 const int dif)
0287 {
0288     return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
0289                      ntohs(dport), dif, 0);
0290 }
0291 
0292 static inline struct sock *__inet_lookup(struct net *net,
0293                      struct inet_hashinfo *hashinfo,
0294                      struct sk_buff *skb, int doff,
0295                      const __be32 saddr, const __be16 sport,
0296                      const __be32 daddr, const __be16 dport,
0297                      const int dif, const int sdif,
0298                      bool *refcounted)
0299 {
0300     u16 hnum = ntohs(dport);
0301     struct sock *sk;
0302 
0303     sk = __inet_lookup_established(net, hashinfo, saddr, sport,
0304                        daddr, hnum, dif, sdif);
0305     *refcounted = true;
0306     if (sk)
0307         return sk;
0308     *refcounted = false;
0309     return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
0310                       sport, daddr, hnum, dif, sdif);
0311 }
0312 
0313 static inline struct sock *inet_lookup(struct net *net,
0314                        struct inet_hashinfo *hashinfo,
0315                        struct sk_buff *skb, int doff,
0316                        const __be32 saddr, const __be16 sport,
0317                        const __be32 daddr, const __be16 dport,
0318                        const int dif)
0319 {
0320     struct sock *sk;
0321     bool refcounted;
0322 
0323     sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
0324                dport, dif, 0, &refcounted);
0325 
0326     if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
0327         sk = NULL;
0328     return sk;
0329 }
0330 
0331 static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
0332                          struct sk_buff *skb,
0333                          int doff,
0334                          const __be16 sport,
0335                          const __be16 dport,
0336                          const int sdif,
0337                          bool *refcounted)
0338 {
0339     struct sock *sk = skb_steal_sock(skb, refcounted);
0340     const struct iphdr *iph = ip_hdr(skb);
0341 
0342     if (sk)
0343         return sk;
0344 
0345     return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
0346                  doff, iph->saddr, sport,
0347                  iph->daddr, dport, inet_iif(skb), sdif,
0348                  refcounted);
0349 }
0350 
0351 u32 inet6_ehashfn(const struct net *net,
0352           const struct in6_addr *laddr, const u16 lport,
0353           const struct in6_addr *faddr, const __be16 fport);
0354 
0355 static inline void sk_daddr_set(struct sock *sk, __be32 addr)
0356 {
0357     sk->sk_daddr = addr; /* alias of inet_daddr */
0358 #if IS_ENABLED(CONFIG_IPV6)
0359     ipv6_addr_set_v4mapped(addr, &sk->sk_v6_daddr);
0360 #endif
0361 }
0362 
0363 static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
0364 {
0365     sk->sk_rcv_saddr = addr; /* alias of inet_rcv_saddr */
0366 #if IS_ENABLED(CONFIG_IPV6)
0367     ipv6_addr_set_v4mapped(addr, &sk->sk_v6_rcv_saddr);
0368 #endif
0369 }
0370 
0371 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
0372             struct sock *sk, u64 port_offset,
0373             int (*check_established)(struct inet_timewait_death_row *,
0374                          struct sock *, __u16,
0375                          struct inet_timewait_sock **));
0376 
0377 int inet_hash_connect(struct inet_timewait_death_row *death_row,
0378               struct sock *sk);
0379 #endif /* _INET_HASHTABLES_H */