0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035 #ifndef _SOCK_H
0036 #define _SOCK_H
0037
0038 #include <linux/hardirq.h>
0039 #include <linux/kernel.h>
0040 #include <linux/list.h>
0041 #include <linux/list_nulls.h>
0042 #include <linux/timer.h>
0043 #include <linux/cache.h>
0044 #include <linux/bitops.h>
0045 #include <linux/lockdep.h>
0046 #include <linux/netdevice.h>
0047 #include <linux/skbuff.h> /* struct sk_buff */
0048 #include <linux/mm.h>
0049 #include <linux/security.h>
0050 #include <linux/slab.h>
0051 #include <linux/uaccess.h>
0052 #include <linux/page_counter.h>
0053 #include <linux/memcontrol.h>
0054 #include <linux/static_key.h>
0055 #include <linux/sched.h>
0056 #include <linux/wait.h>
0057 #include <linux/cgroup-defs.h>
0058 #include <linux/rbtree.h>
0059 #include <linux/rculist_nulls.h>
0060 #include <linux/poll.h>
0061 #include <linux/sockptr.h>
0062 #include <linux/indirect_call_wrapper.h>
0063 #include <linux/atomic.h>
0064 #include <linux/refcount.h>
0065 #include <linux/llist.h>
0066 #include <net/dst.h>
0067 #include <net/checksum.h>
0068 #include <net/tcp_states.h>
0069 #include <linux/net_tstamp.h>
0070 #include <net/l3mdev.h>
0071 #include <uapi/linux/socket.h>
0072
0073
0074
0075
0076
0077
0078
0079
0080 #define SOCK_DEBUGGING
0081 #ifdef SOCK_DEBUGGING
0082 #define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \
0083 printk(KERN_DEBUG msg); } while (0)
0084 #else
0085
0086 static inline __printf(2, 3)
0087 void SOCK_DEBUG(const struct sock *sk, const char *msg, ...)
0088 {
0089 }
0090 #endif
0091
0092
0093
0094
0095
0096 typedef struct {
0097 spinlock_t slock;
0098 int owned;
0099 wait_queue_head_t wq;
0100
0101
0102
0103
0104
0105
0106 #ifdef CONFIG_DEBUG_LOCK_ALLOC
0107 struct lockdep_map dep_map;
0108 #endif
0109 } socket_lock_t;
0110
0111 struct sock;
0112 struct proto;
0113 struct net;
0114
0115 typedef __u32 __bitwise __portpair;
0116 typedef __u64 __bitwise __addrpair;
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163 struct sock_common {
0164 union {
0165 __addrpair skc_addrpair;
0166 struct {
0167 __be32 skc_daddr;
0168 __be32 skc_rcv_saddr;
0169 };
0170 };
0171 union {
0172 unsigned int skc_hash;
0173 __u16 skc_u16hashes[2];
0174 };
0175
0176 union {
0177 __portpair skc_portpair;
0178 struct {
0179 __be16 skc_dport;
0180 __u16 skc_num;
0181 };
0182 };
0183
0184 unsigned short skc_family;
0185 volatile unsigned char skc_state;
0186 unsigned char skc_reuse:4;
0187 unsigned char skc_reuseport:1;
0188 unsigned char skc_ipv6only:1;
0189 unsigned char skc_net_refcnt:1;
0190 int skc_bound_dev_if;
0191 union {
0192 struct hlist_node skc_bind_node;
0193 struct hlist_node skc_portaddr_node;
0194 };
0195 struct proto *skc_prot;
0196 possible_net_t skc_net;
0197
0198 #if IS_ENABLED(CONFIG_IPV6)
0199 struct in6_addr skc_v6_daddr;
0200 struct in6_addr skc_v6_rcv_saddr;
0201 #endif
0202
0203 atomic64_t skc_cookie;
0204
0205
0206
0207
0208
0209
0210 union {
0211 unsigned long skc_flags;
0212 struct sock *skc_listener;
0213 struct inet_timewait_death_row *skc_tw_dr;
0214 };
0215
0216
0217
0218
0219
0220 int skc_dontcopy_begin[0];
0221
0222 union {
0223 struct hlist_node skc_node;
0224 struct hlist_nulls_node skc_nulls_node;
0225 };
0226 unsigned short skc_tx_queue_mapping;
0227 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
0228 unsigned short skc_rx_queue_mapping;
0229 #endif
0230 union {
0231 int skc_incoming_cpu;
0232 u32 skc_rcv_wnd;
0233 u32 skc_tw_rcv_nxt;
0234 };
0235
0236 refcount_t skc_refcnt;
0237
0238 int skc_dontcopy_end[0];
0239 union {
0240 u32 skc_rxhash;
0241 u32 skc_window_clamp;
0242 u32 skc_tw_snd_nxt;
0243 };
0244
0245 };
0246
0247 struct bpf_local_storage;
0248 struct sk_filter;
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352 struct sock {
0353
0354
0355
0356
0357 struct sock_common __sk_common;
0358 #define sk_node __sk_common.skc_node
0359 #define sk_nulls_node __sk_common.skc_nulls_node
0360 #define sk_refcnt __sk_common.skc_refcnt
0361 #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping
0362 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
0363 #define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping
0364 #endif
0365
0366 #define sk_dontcopy_begin __sk_common.skc_dontcopy_begin
0367 #define sk_dontcopy_end __sk_common.skc_dontcopy_end
0368 #define sk_hash __sk_common.skc_hash
0369 #define sk_portpair __sk_common.skc_portpair
0370 #define sk_num __sk_common.skc_num
0371 #define sk_dport __sk_common.skc_dport
0372 #define sk_addrpair __sk_common.skc_addrpair
0373 #define sk_daddr __sk_common.skc_daddr
0374 #define sk_rcv_saddr __sk_common.skc_rcv_saddr
0375 #define sk_family __sk_common.skc_family
0376 #define sk_state __sk_common.skc_state
0377 #define sk_reuse __sk_common.skc_reuse
0378 #define sk_reuseport __sk_common.skc_reuseport
0379 #define sk_ipv6only __sk_common.skc_ipv6only
0380 #define sk_net_refcnt __sk_common.skc_net_refcnt
0381 #define sk_bound_dev_if __sk_common.skc_bound_dev_if
0382 #define sk_bind_node __sk_common.skc_bind_node
0383 #define sk_prot __sk_common.skc_prot
0384 #define sk_net __sk_common.skc_net
0385 #define sk_v6_daddr __sk_common.skc_v6_daddr
0386 #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
0387 #define sk_cookie __sk_common.skc_cookie
0388 #define sk_incoming_cpu __sk_common.skc_incoming_cpu
0389 #define sk_flags __sk_common.skc_flags
0390 #define sk_rxhash __sk_common.skc_rxhash
0391
0392
0393 struct dst_entry __rcu *sk_rx_dst;
0394 int sk_rx_dst_ifindex;
0395 u32 sk_rx_dst_cookie;
0396
0397 socket_lock_t sk_lock;
0398 atomic_t sk_drops;
0399 int sk_rcvlowat;
0400 struct sk_buff_head sk_error_queue;
0401 struct sk_buff_head sk_receive_queue;
0402
0403
0404
0405
0406
0407
0408
0409
0410 struct {
0411 atomic_t rmem_alloc;
0412 int len;
0413 struct sk_buff *head;
0414 struct sk_buff *tail;
0415 } sk_backlog;
0416
0417 #define sk_rmem_alloc sk_backlog.rmem_alloc
0418
0419 int sk_forward_alloc;
0420 u32 sk_reserved_mem;
0421 #ifdef CONFIG_NET_RX_BUSY_POLL
0422 unsigned int sk_ll_usec;
0423
0424 unsigned int sk_napi_id;
0425 #endif
0426 int sk_rcvbuf;
0427
0428 struct sk_filter __rcu *sk_filter;
0429 union {
0430 struct socket_wq __rcu *sk_wq;
0431
0432 struct socket_wq *sk_wq_raw;
0433
0434 };
0435 #ifdef CONFIG_XFRM
0436 struct xfrm_policy __rcu *sk_policy[2];
0437 #endif
0438
0439 struct dst_entry __rcu *sk_dst_cache;
0440 atomic_t sk_omem_alloc;
0441 int sk_sndbuf;
0442
0443
0444 int sk_wmem_queued;
0445 refcount_t sk_wmem_alloc;
0446 unsigned long sk_tsq_flags;
0447 union {
0448 struct sk_buff *sk_send_head;
0449 struct rb_root tcp_rtx_queue;
0450 };
0451 struct sk_buff_head sk_write_queue;
0452 __s32 sk_peek_off;
0453 int sk_write_pending;
0454 __u32 sk_dst_pending_confirm;
0455 u32 sk_pacing_status;
0456 long sk_sndtimeo;
0457 struct timer_list sk_timer;
0458 __u32 sk_priority;
0459 __u32 sk_mark;
0460 unsigned long sk_pacing_rate;
0461 unsigned long sk_max_pacing_rate;
0462 struct page_frag sk_frag;
0463 netdev_features_t sk_route_caps;
0464 int sk_gso_type;
0465 unsigned int sk_gso_max_size;
0466 gfp_t sk_allocation;
0467 __u32 sk_txhash;
0468
0469
0470
0471
0472
0473 u8 sk_gso_disabled : 1,
0474 sk_kern_sock : 1,
0475 sk_no_check_tx : 1,
0476 sk_no_check_rx : 1,
0477 sk_userlocks : 4;
0478 u8 sk_pacing_shift;
0479 u16 sk_type;
0480 u16 sk_protocol;
0481 u16 sk_gso_max_segs;
0482 unsigned long sk_lingertime;
0483 struct proto *sk_prot_creator;
0484 rwlock_t sk_callback_lock;
0485 int sk_err,
0486 sk_err_soft;
0487 u32 sk_ack_backlog;
0488 u32 sk_max_ack_backlog;
0489 kuid_t sk_uid;
0490 u8 sk_txrehash;
0491 #ifdef CONFIG_NET_RX_BUSY_POLL
0492 u8 sk_prefer_busy_poll;
0493 u16 sk_busy_poll_budget;
0494 #endif
0495 spinlock_t sk_peer_lock;
0496 int sk_bind_phc;
0497 struct pid *sk_peer_pid;
0498 const struct cred *sk_peer_cred;
0499
0500 long sk_rcvtimeo;
0501 ktime_t sk_stamp;
0502 #if BITS_PER_LONG==32
0503 seqlock_t sk_stamp_seq;
0504 #endif
0505 u16 sk_tsflags;
0506 u8 sk_shutdown;
0507 atomic_t sk_tskey;
0508 atomic_t sk_zckey;
0509
0510 u8 sk_clockid;
0511 u8 sk_txtime_deadline_mode : 1,
0512 sk_txtime_report_errors : 1,
0513 sk_txtime_unused : 6;
0514
0515 struct socket *sk_socket;
0516 void *sk_user_data;
0517 #ifdef CONFIG_SECURITY
0518 void *sk_security;
0519 #endif
0520 struct sock_cgroup_data sk_cgrp_data;
0521 struct mem_cgroup *sk_memcg;
0522 void (*sk_state_change)(struct sock *sk);
0523 void (*sk_data_ready)(struct sock *sk);
0524 void (*sk_write_space)(struct sock *sk);
0525 void (*sk_error_report)(struct sock *sk);
0526 int (*sk_backlog_rcv)(struct sock *sk,
0527 struct sk_buff *skb);
0528 #ifdef CONFIG_SOCK_VALIDATE_XMIT
0529 struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
0530 struct net_device *dev,
0531 struct sk_buff *skb);
0532 #endif
0533 void (*sk_destruct)(struct sock *sk);
0534 struct sock_reuseport __rcu *sk_reuseport_cb;
0535 #ifdef CONFIG_BPF_SYSCALL
0536 struct bpf_local_storage __rcu *sk_bpf_storage;
0537 #endif
0538 struct rcu_head sk_rcu;
0539 netns_tracker ns_tracker;
0540 };
0541
0542 enum sk_pacing {
0543 SK_PACING_NONE = 0,
0544 SK_PACING_NEEDED = 1,
0545 SK_PACING_FQ = 2,
0546 };
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563 #define SK_USER_DATA_NOCOPY 1UL
0564 #define SK_USER_DATA_BPF 2UL
0565 #define SK_USER_DATA_PSOCK 4UL
0566 #define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\
0567 SK_USER_DATA_PSOCK)
0568
0569
0570
0571
0572
0573 static inline bool sk_user_data_is_nocopy(const struct sock *sk)
0574 {
0575 return ((uintptr_t)sk->sk_user_data & SK_USER_DATA_NOCOPY);
0576 }
0577
0578 #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590 static inline void *
0591 __locked_read_sk_user_data_with_flags(const struct sock *sk,
0592 uintptr_t flags)
0593 {
0594 uintptr_t sk_user_data =
0595 (uintptr_t)rcu_dereference_check(__sk_user_data(sk),
0596 lockdep_is_held(&sk->sk_callback_lock));
0597
0598 WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
0599
0600 if ((sk_user_data & flags) == flags)
0601 return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
0602 return NULL;
0603 }
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613 static inline void *
0614 __rcu_dereference_sk_user_data_with_flags(const struct sock *sk,
0615 uintptr_t flags)
0616 {
0617 uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk));
0618
0619 WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
0620
0621 if ((sk_user_data & flags) == flags)
0622 return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
0623 return NULL;
0624 }
0625
0626 #define rcu_dereference_sk_user_data(sk) \
0627 __rcu_dereference_sk_user_data_with_flags(sk, 0)
0628 #define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \
0629 ({ \
0630 uintptr_t __tmp1 = (uintptr_t)(ptr), \
0631 __tmp2 = (uintptr_t)(flags); \
0632 WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \
0633 WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \
0634 rcu_assign_pointer(__sk_user_data((sk)), \
0635 __tmp1 | __tmp2); \
0636 })
0637 #define rcu_assign_sk_user_data(sk, ptr) \
0638 __rcu_assign_sk_user_data_with_flags(sk, ptr, 0)
0639
0640 static inline
0641 struct net *sock_net(const struct sock *sk)
0642 {
0643 return read_pnet(&sk->sk_net);
0644 }
0645
0646 static inline
0647 void sock_net_set(struct sock *sk, struct net *net)
0648 {
0649 write_pnet(&sk->sk_net, net);
0650 }
0651
0652
0653
0654
0655
0656
0657
0658
0659 #define SK_NO_REUSE 0
0660 #define SK_CAN_REUSE 1
0661 #define SK_FORCE_REUSE 2
0662
0663 int sk_set_peek_off(struct sock *sk, int val);
0664
0665 static inline int sk_peek_offset(const struct sock *sk, int flags)
0666 {
0667 if (unlikely(flags & MSG_PEEK)) {
0668 return READ_ONCE(sk->sk_peek_off);
0669 }
0670
0671 return 0;
0672 }
0673
0674 static inline void sk_peek_offset_bwd(struct sock *sk, int val)
0675 {
0676 s32 off = READ_ONCE(sk->sk_peek_off);
0677
0678 if (unlikely(off >= 0)) {
0679 off = max_t(s32, off - val, 0);
0680 WRITE_ONCE(sk->sk_peek_off, off);
0681 }
0682 }
0683
0684 static inline void sk_peek_offset_fwd(struct sock *sk, int val)
0685 {
0686 sk_peek_offset_bwd(sk, -val);
0687 }
0688
0689
0690
0691
0692 static inline struct sock *sk_entry(const struct hlist_node *node)
0693 {
0694 return hlist_entry(node, struct sock, sk_node);
0695 }
0696
0697 static inline struct sock *__sk_head(const struct hlist_head *head)
0698 {
0699 return hlist_entry(head->first, struct sock, sk_node);
0700 }
0701
0702 static inline struct sock *sk_head(const struct hlist_head *head)
0703 {
0704 return hlist_empty(head) ? NULL : __sk_head(head);
0705 }
0706
0707 static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head)
0708 {
0709 return hlist_nulls_entry(head->first, struct sock, sk_nulls_node);
0710 }
0711
0712 static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head)
0713 {
0714 return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head);
0715 }
0716
0717 static inline struct sock *sk_next(const struct sock *sk)
0718 {
0719 return hlist_entry_safe(sk->sk_node.next, struct sock, sk_node);
0720 }
0721
0722 static inline struct sock *sk_nulls_next(const struct sock *sk)
0723 {
0724 return (!is_a_nulls(sk->sk_nulls_node.next)) ?
0725 hlist_nulls_entry(sk->sk_nulls_node.next,
0726 struct sock, sk_nulls_node) :
0727 NULL;
0728 }
0729
0730 static inline bool sk_unhashed(const struct sock *sk)
0731 {
0732 return hlist_unhashed(&sk->sk_node);
0733 }
0734
0735 static inline bool sk_hashed(const struct sock *sk)
0736 {
0737 return !sk_unhashed(sk);
0738 }
0739
0740 static inline void sk_node_init(struct hlist_node *node)
0741 {
0742 node->pprev = NULL;
0743 }
0744
0745 static inline void sk_nulls_node_init(struct hlist_nulls_node *node)
0746 {
0747 node->pprev = NULL;
0748 }
0749
0750 static inline void __sk_del_node(struct sock *sk)
0751 {
0752 __hlist_del(&sk->sk_node);
0753 }
0754
0755
0756 static inline bool __sk_del_node_init(struct sock *sk)
0757 {
0758 if (sk_hashed(sk)) {
0759 __sk_del_node(sk);
0760 sk_node_init(&sk->sk_node);
0761 return true;
0762 }
0763 return false;
0764 }
0765
0766
0767
0768
0769
0770
0771
0772 static __always_inline void sock_hold(struct sock *sk)
0773 {
0774 refcount_inc(&sk->sk_refcnt);
0775 }
0776
0777
0778
0779
0780 static __always_inline void __sock_put(struct sock *sk)
0781 {
0782 refcount_dec(&sk->sk_refcnt);
0783 }
0784
0785 static inline bool sk_del_node_init(struct sock *sk)
0786 {
0787 bool rc = __sk_del_node_init(sk);
0788
0789 if (rc) {
0790
0791 WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
0792 __sock_put(sk);
0793 }
0794 return rc;
0795 }
0796 #define sk_del_node_init_rcu(sk) sk_del_node_init(sk)
0797
0798 static inline bool __sk_nulls_del_node_init_rcu(struct sock *sk)
0799 {
0800 if (sk_hashed(sk)) {
0801 hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
0802 return true;
0803 }
0804 return false;
0805 }
0806
0807 static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
0808 {
0809 bool rc = __sk_nulls_del_node_init_rcu(sk);
0810
0811 if (rc) {
0812
0813 WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
0814 __sock_put(sk);
0815 }
0816 return rc;
0817 }
0818
0819 static inline void __sk_add_node(struct sock *sk, struct hlist_head *list)
0820 {
0821 hlist_add_head(&sk->sk_node, list);
0822 }
0823
0824 static inline void sk_add_node(struct sock *sk, struct hlist_head *list)
0825 {
0826 sock_hold(sk);
0827 __sk_add_node(sk, list);
0828 }
0829
0830 static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
0831 {
0832 sock_hold(sk);
0833 if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
0834 sk->sk_family == AF_INET6)
0835 hlist_add_tail_rcu(&sk->sk_node, list);
0836 else
0837 hlist_add_head_rcu(&sk->sk_node, list);
0838 }
0839
0840 static inline void sk_add_node_tail_rcu(struct sock *sk, struct hlist_head *list)
0841 {
0842 sock_hold(sk);
0843 hlist_add_tail_rcu(&sk->sk_node, list);
0844 }
0845
0846 static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
0847 {
0848 hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
0849 }
0850
0851 static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list)
0852 {
0853 hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
0854 }
0855
0856 static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
0857 {
0858 sock_hold(sk);
0859 __sk_nulls_add_node_rcu(sk, list);
0860 }
0861
0862 static inline void __sk_del_bind_node(struct sock *sk)
0863 {
0864 __hlist_del(&sk->sk_bind_node);
0865 }
0866
0867 static inline void sk_add_bind_node(struct sock *sk,
0868 struct hlist_head *list)
0869 {
0870 hlist_add_head(&sk->sk_bind_node, list);
0871 }
0872
0873 #define sk_for_each(__sk, list) \
0874 hlist_for_each_entry(__sk, list, sk_node)
0875 #define sk_for_each_rcu(__sk, list) \
0876 hlist_for_each_entry_rcu(__sk, list, sk_node)
0877 #define sk_nulls_for_each(__sk, node, list) \
0878 hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
0879 #define sk_nulls_for_each_rcu(__sk, node, list) \
0880 hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
0881 #define sk_for_each_from(__sk) \
0882 hlist_for_each_entry_from(__sk, sk_node)
0883 #define sk_nulls_for_each_from(__sk, node) \
0884 if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \
0885 hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node)
0886 #define sk_for_each_safe(__sk, tmp, list) \
0887 hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
0888 #define sk_for_each_bound(__sk, list) \
0889 hlist_for_each_entry(__sk, list, sk_bind_node)
0890
0891
0892
0893
0894
0895
0896
0897
0898
0899 #define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \
0900 for (pos = rcu_dereference(hlist_first_rcu(head)); \
0901 pos != NULL && \
0902 ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
0903 pos = rcu_dereference(hlist_next_rcu(pos)))
0904
0905 static inline struct user_namespace *sk_user_ns(const struct sock *sk)
0906 {
0907
0908
0909
0910
0911 return sk->sk_socket->file->f_cred->user_ns;
0912 }
0913
0914
0915 enum sock_flags {
0916 SOCK_DEAD,
0917 SOCK_DONE,
0918 SOCK_URGINLINE,
0919 SOCK_KEEPOPEN,
0920 SOCK_LINGER,
0921 SOCK_DESTROY,
0922 SOCK_BROADCAST,
0923 SOCK_TIMESTAMP,
0924 SOCK_ZAPPED,
0925 SOCK_USE_WRITE_QUEUE,
0926 SOCK_DBG,
0927 SOCK_RCVTSTAMP,
0928 SOCK_RCVTSTAMPNS,
0929 SOCK_LOCALROUTE,
0930 SOCK_MEMALLOC,
0931 SOCK_TIMESTAMPING_RX_SOFTWARE,
0932 SOCK_FASYNC,
0933 SOCK_RXQ_OVFL,
0934 SOCK_ZEROCOPY,
0935 SOCK_WIFI_STATUS,
0936 SOCK_NOFCS,
0937
0938
0939
0940 SOCK_FILTER_LOCKED,
0941 SOCK_SELECT_ERR_QUEUE,
0942 SOCK_RCU_FREE,
0943 SOCK_TXTIME,
0944 SOCK_XDP,
0945 SOCK_TSTAMP_NEW,
0946 SOCK_RCVMARK,
0947 };
0948
0949 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
0950
0951 static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk)
0952 {
0953 nsk->sk_flags = osk->sk_flags;
0954 }
0955
0956 static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
0957 {
0958 __set_bit(flag, &sk->sk_flags);
0959 }
0960
0961 static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag)
0962 {
0963 __clear_bit(flag, &sk->sk_flags);
0964 }
0965
0966 static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
0967 int valbool)
0968 {
0969 if (valbool)
0970 sock_set_flag(sk, bit);
0971 else
0972 sock_reset_flag(sk, bit);
0973 }
0974
0975 static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
0976 {
0977 return test_bit(flag, &sk->sk_flags);
0978 }
0979
0980 #ifdef CONFIG_NET
0981 DECLARE_STATIC_KEY_FALSE(memalloc_socks_key);
0982 static inline int sk_memalloc_socks(void)
0983 {
0984 return static_branch_unlikely(&memalloc_socks_key);
0985 }
0986
0987 void __receive_sock(struct file *file);
0988 #else
0989
0990 static inline int sk_memalloc_socks(void)
0991 {
0992 return 0;
0993 }
0994
0995 static inline void __receive_sock(struct file *file)
0996 { }
0997 #endif
0998
0999 static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
1000 {
1001 return gfp_mask | (sk->sk_allocation & __GFP_MEMALLOC);
1002 }
1003
1004 static inline void sk_acceptq_removed(struct sock *sk)
1005 {
1006 WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1);
1007 }
1008
1009 static inline void sk_acceptq_added(struct sock *sk)
1010 {
1011 WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1);
1012 }
1013
1014
1015
1016
1017
1018 static inline bool sk_acceptq_is_full(const struct sock *sk)
1019 {
1020 return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog);
1021 }
1022
1023
1024
1025
1026 static inline int sk_stream_min_wspace(const struct sock *sk)
1027 {
1028 return READ_ONCE(sk->sk_wmem_queued) >> 1;
1029 }
1030
1031 static inline int sk_stream_wspace(const struct sock *sk)
1032 {
1033 return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued);
1034 }
1035
1036 static inline void sk_wmem_queued_add(struct sock *sk, int val)
1037 {
1038 WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
1039 }
1040
1041 void sk_stream_write_space(struct sock *sk);
1042
1043
1044 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
1045 {
1046
1047 skb_dst_force(skb);
1048
1049 if (!sk->sk_backlog.tail)
1050 WRITE_ONCE(sk->sk_backlog.head, skb);
1051 else
1052 sk->sk_backlog.tail->next = skb;
1053
1054 WRITE_ONCE(sk->sk_backlog.tail, skb);
1055 skb->next = NULL;
1056 }
1057
1058
1059
1060
1061
1062
1063 static inline bool sk_rcvqueues_full(const struct sock *sk, unsigned int limit)
1064 {
1065 unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
1066
1067 return qsize > limit;
1068 }
1069
1070
1071 static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb,
1072 unsigned int limit)
1073 {
1074 if (sk_rcvqueues_full(sk, limit))
1075 return -ENOBUFS;
1076
1077
1078
1079
1080
1081
1082 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
1083 return -ENOMEM;
1084
1085 __sk_add_backlog(sk, skb);
1086 sk->sk_backlog.len += skb->truesize;
1087 return 0;
1088 }
1089
1090 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
1091
1092 INDIRECT_CALLABLE_DECLARE(int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb));
1093 INDIRECT_CALLABLE_DECLARE(int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb));
1094
1095 static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1096 {
1097 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
1098 return __sk_backlog_rcv(sk, skb);
1099
1100 return INDIRECT_CALL_INET(sk->sk_backlog_rcv,
1101 tcp_v6_do_rcv,
1102 tcp_v4_do_rcv,
1103 sk, skb);
1104 }
1105
1106 static inline void sk_incoming_cpu_update(struct sock *sk)
1107 {
1108 int cpu = raw_smp_processor_id();
1109
1110 if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu))
1111 WRITE_ONCE(sk->sk_incoming_cpu, cpu);
1112 }
1113
1114 static inline void sock_rps_record_flow_hash(__u32 hash)
1115 {
1116 #ifdef CONFIG_RPS
1117 struct rps_sock_flow_table *sock_flow_table;
1118
1119 rcu_read_lock();
1120 sock_flow_table = rcu_dereference(rps_sock_flow_table);
1121 rps_record_sock_flow(sock_flow_table, hash);
1122 rcu_read_unlock();
1123 #endif
1124 }
1125
1126 static inline void sock_rps_record_flow(const struct sock *sk)
1127 {
1128 #ifdef CONFIG_RPS
1129 if (static_branch_unlikely(&rfs_needed)) {
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140 if (sk->sk_state == TCP_ESTABLISHED)
1141 sock_rps_record_flow_hash(sk->sk_rxhash);
1142 }
1143 #endif
1144 }
1145
1146 static inline void sock_rps_save_rxhash(struct sock *sk,
1147 const struct sk_buff *skb)
1148 {
1149 #ifdef CONFIG_RPS
1150 if (unlikely(sk->sk_rxhash != skb->hash))
1151 sk->sk_rxhash = skb->hash;
1152 #endif
1153 }
1154
1155 static inline void sock_rps_reset_rxhash(struct sock *sk)
1156 {
1157 #ifdef CONFIG_RPS
1158 sk->sk_rxhash = 0;
1159 #endif
1160 }
1161
1162 #define sk_wait_event(__sk, __timeo, __condition, __wait) \
1163 ({ int __rc; \
1164 release_sock(__sk); \
1165 __rc = __condition; \
1166 if (!__rc) { \
1167 *(__timeo) = wait_woken(__wait, \
1168 TASK_INTERRUPTIBLE, \
1169 *(__timeo)); \
1170 } \
1171 sched_annotate_sleep(); \
1172 lock_sock(__sk); \
1173 __rc = __condition; \
1174 __rc; \
1175 })
1176
1177 int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
1178 int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
1179 void sk_stream_wait_close(struct sock *sk, long timeo_p);
1180 int sk_stream_error(struct sock *sk, int flags, int err);
1181 void sk_stream_kill_queues(struct sock *sk);
1182 void sk_set_memalloc(struct sock *sk);
1183 void sk_clear_memalloc(struct sock *sk);
1184
1185 void __sk_flush_backlog(struct sock *sk);
1186
1187 static inline bool sk_flush_backlog(struct sock *sk)
1188 {
1189 if (unlikely(READ_ONCE(sk->sk_backlog.tail))) {
1190 __sk_flush_backlog(sk);
1191 return true;
1192 }
1193 return false;
1194 }
1195
1196 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb);
1197
1198 struct request_sock_ops;
1199 struct timewait_sock_ops;
1200 struct inet_hashinfo;
1201 struct raw_hashinfo;
1202 struct smc_hashinfo;
1203 struct module;
1204 struct sk_psock;
1205
1206
1207
1208
1209
1210 static inline void sk_prot_clear_nulls(struct sock *sk, int size)
1211 {
1212 if (offsetof(struct sock, sk_node.next) != 0)
1213 memset(sk, 0, offsetof(struct sock, sk_node.next));
1214 memset(&sk->sk_node.pprev, 0,
1215 size - offsetof(struct sock, sk_node.pprev));
1216 }
1217
1218
1219
1220
1221 struct proto {
1222 void (*close)(struct sock *sk,
1223 long timeout);
1224 int (*pre_connect)(struct sock *sk,
1225 struct sockaddr *uaddr,
1226 int addr_len);
1227 int (*connect)(struct sock *sk,
1228 struct sockaddr *uaddr,
1229 int addr_len);
1230 int (*disconnect)(struct sock *sk, int flags);
1231
1232 struct sock * (*accept)(struct sock *sk, int flags, int *err,
1233 bool kern);
1234
1235 int (*ioctl)(struct sock *sk, int cmd,
1236 unsigned long arg);
1237 int (*init)(struct sock *sk);
1238 void (*destroy)(struct sock *sk);
1239 void (*shutdown)(struct sock *sk, int how);
1240 int (*setsockopt)(struct sock *sk, int level,
1241 int optname, sockptr_t optval,
1242 unsigned int optlen);
1243 int (*getsockopt)(struct sock *sk, int level,
1244 int optname, char __user *optval,
1245 int __user *option);
1246 void (*keepalive)(struct sock *sk, int valbool);
1247 #ifdef CONFIG_COMPAT
1248 int (*compat_ioctl)(struct sock *sk,
1249 unsigned int cmd, unsigned long arg);
1250 #endif
1251 int (*sendmsg)(struct sock *sk, struct msghdr *msg,
1252 size_t len);
1253 int (*recvmsg)(struct sock *sk, struct msghdr *msg,
1254 size_t len, int flags, int *addr_len);
1255 int (*sendpage)(struct sock *sk, struct page *page,
1256 int offset, size_t size, int flags);
1257 int (*bind)(struct sock *sk,
1258 struct sockaddr *addr, int addr_len);
1259 int (*bind_add)(struct sock *sk,
1260 struct sockaddr *addr, int addr_len);
1261
1262 int (*backlog_rcv) (struct sock *sk,
1263 struct sk_buff *skb);
1264 bool (*bpf_bypass_getsockopt)(int level,
1265 int optname);
1266
1267 void (*release_cb)(struct sock *sk);
1268
1269
1270 int (*hash)(struct sock *sk);
1271 void (*unhash)(struct sock *sk);
1272 void (*rehash)(struct sock *sk);
1273 int (*get_port)(struct sock *sk, unsigned short snum);
1274 void (*put_port)(struct sock *sk);
1275 #ifdef CONFIG_BPF_SYSCALL
1276 int (*psock_update_sk_prot)(struct sock *sk,
1277 struct sk_psock *psock,
1278 bool restore);
1279 #endif
1280
1281
1282 #ifdef CONFIG_PROC_FS
1283 unsigned int inuse_idx;
1284 #endif
1285
1286 #if IS_ENABLED(CONFIG_MPTCP)
1287 int (*forward_alloc_get)(const struct sock *sk);
1288 #endif
1289
1290 bool (*stream_memory_free)(const struct sock *sk, int wake);
1291 bool (*sock_is_readable)(struct sock *sk);
1292
1293 void (*enter_memory_pressure)(struct sock *sk);
1294 void (*leave_memory_pressure)(struct sock *sk);
1295 atomic_long_t *memory_allocated;
1296 int __percpu *per_cpu_fw_alloc;
1297 struct percpu_counter *sockets_allocated;
1298
1299
1300
1301
1302
1303
1304
1305 unsigned long *memory_pressure;
1306 long *sysctl_mem;
1307
1308 int *sysctl_wmem;
1309 int *sysctl_rmem;
1310 u32 sysctl_wmem_offset;
1311 u32 sysctl_rmem_offset;
1312
1313 int max_header;
1314 bool no_autobind;
1315
1316 struct kmem_cache *slab;
1317 unsigned int obj_size;
1318 slab_flags_t slab_flags;
1319 unsigned int useroffset;
1320 unsigned int usersize;
1321
1322 unsigned int __percpu *orphan_count;
1323
1324 struct request_sock_ops *rsk_prot;
1325 struct timewait_sock_ops *twsk_prot;
1326
1327 union {
1328 struct inet_hashinfo *hashinfo;
1329 struct udp_table *udp_table;
1330 struct raw_hashinfo *raw_hash;
1331 struct smc_hashinfo *smc_hash;
1332 } h;
1333
1334 struct module *owner;
1335
1336 char name[32];
1337
1338 struct list_head node;
1339 #ifdef SOCK_REFCNT_DEBUG
1340 atomic_t socks;
1341 #endif
1342 int (*diag_destroy)(struct sock *sk, int err);
1343 } __randomize_layout;
1344
1345 int proto_register(struct proto *prot, int alloc_slab);
1346 void proto_unregister(struct proto *prot);
1347 int sock_load_diag_module(int family, int protocol);
1348
1349 #ifdef SOCK_REFCNT_DEBUG
1350 static inline void sk_refcnt_debug_inc(struct sock *sk)
1351 {
1352 atomic_inc(&sk->sk_prot->socks);
1353 }
1354
1355 static inline void sk_refcnt_debug_dec(struct sock *sk)
1356 {
1357 atomic_dec(&sk->sk_prot->socks);
1358 printk(KERN_DEBUG "%s socket %p released, %d are still alive\n",
1359 sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
1360 }
1361
1362 static inline void sk_refcnt_debug_release(const struct sock *sk)
1363 {
1364 if (refcount_read(&sk->sk_refcnt) != 1)
1365 printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
1366 sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt));
1367 }
1368 #else
1369 #define sk_refcnt_debug_inc(sk) do { } while (0)
1370 #define sk_refcnt_debug_dec(sk) do { } while (0)
1371 #define sk_refcnt_debug_release(sk) do { } while (0)
1372 #endif
1373
1374 INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));
1375
1376 static inline int sk_forward_alloc_get(const struct sock *sk)
1377 {
1378 #if IS_ENABLED(CONFIG_MPTCP)
1379 if (sk->sk_prot->forward_alloc_get)
1380 return sk->sk_prot->forward_alloc_get(sk);
1381 #endif
1382 return sk->sk_forward_alloc;
1383 }
1384
1385 static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
1386 {
1387 if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
1388 return false;
1389
1390 return sk->sk_prot->stream_memory_free ?
1391 INDIRECT_CALL_INET_1(sk->sk_prot->stream_memory_free,
1392 tcp_stream_memory_free, sk, wake) : true;
1393 }
1394
1395 static inline bool sk_stream_memory_free(const struct sock *sk)
1396 {
1397 return __sk_stream_memory_free(sk, 0);
1398 }
1399
1400 static inline bool __sk_stream_is_writeable(const struct sock *sk, int wake)
1401 {
1402 return sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) &&
1403 __sk_stream_memory_free(sk, wake);
1404 }
1405
1406 static inline bool sk_stream_is_writeable(const struct sock *sk)
1407 {
1408 return __sk_stream_is_writeable(sk, 0);
1409 }
1410
1411 static inline int sk_under_cgroup_hierarchy(struct sock *sk,
1412 struct cgroup *ancestor)
1413 {
1414 #ifdef CONFIG_SOCK_CGROUP_DATA
1415 return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data),
1416 ancestor);
1417 #else
1418 return -ENOTSUPP;
1419 #endif
1420 }
1421
1422 static inline bool sk_has_memory_pressure(const struct sock *sk)
1423 {
1424 return sk->sk_prot->memory_pressure != NULL;
1425 }
1426
1427 static inline bool sk_under_memory_pressure(const struct sock *sk)
1428 {
1429 if (!sk->sk_prot->memory_pressure)
1430 return false;
1431
1432 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
1433 mem_cgroup_under_socket_pressure(sk->sk_memcg))
1434 return true;
1435
1436 return !!*sk->sk_prot->memory_pressure;
1437 }
1438
1439 static inline long
1440 proto_memory_allocated(const struct proto *prot)
1441 {
1442 return max(0L, atomic_long_read(prot->memory_allocated));
1443 }
1444
1445 static inline long
1446 sk_memory_allocated(const struct sock *sk)
1447 {
1448 return proto_memory_allocated(sk->sk_prot);
1449 }
1450
1451
1452 #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
1453
1454 static inline void
1455 sk_memory_allocated_add(struct sock *sk, int amt)
1456 {
1457 int local_reserve;
1458
1459 preempt_disable();
1460 local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
1461 if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
1462 __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
1463 atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
1464 }
1465 preempt_enable();
1466 }
1467
1468 static inline void
1469 sk_memory_allocated_sub(struct sock *sk, int amt)
1470 {
1471 int local_reserve;
1472
1473 preempt_disable();
1474 local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
1475 if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
1476 __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
1477 atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
1478 }
1479 preempt_enable();
1480 }
1481
1482 #define SK_ALLOC_PERCPU_COUNTER_BATCH 16
1483
1484 static inline void sk_sockets_allocated_dec(struct sock *sk)
1485 {
1486 percpu_counter_add_batch(sk->sk_prot->sockets_allocated, -1,
1487 SK_ALLOC_PERCPU_COUNTER_BATCH);
1488 }
1489
1490 static inline void sk_sockets_allocated_inc(struct sock *sk)
1491 {
1492 percpu_counter_add_batch(sk->sk_prot->sockets_allocated, 1,
1493 SK_ALLOC_PERCPU_COUNTER_BATCH);
1494 }
1495
1496 static inline u64
1497 sk_sockets_allocated_read_positive(struct sock *sk)
1498 {
1499 return percpu_counter_read_positive(sk->sk_prot->sockets_allocated);
1500 }
1501
1502 static inline int
1503 proto_sockets_allocated_sum_positive(struct proto *prot)
1504 {
1505 return percpu_counter_sum_positive(prot->sockets_allocated);
1506 }
1507
1508 static inline bool
1509 proto_memory_pressure(struct proto *prot)
1510 {
1511 if (!prot->memory_pressure)
1512 return false;
1513 return !!*prot->memory_pressure;
1514 }
1515
1516
1517 #ifdef CONFIG_PROC_FS
1518 #define PROTO_INUSE_NR 64
1519 struct prot_inuse {
1520 int all;
1521 int val[PROTO_INUSE_NR];
1522 };
1523
1524 static inline void sock_prot_inuse_add(const struct net *net,
1525 const struct proto *prot, int val)
1526 {
1527 this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
1528 }
1529
1530 static inline void sock_inuse_add(const struct net *net, int val)
1531 {
1532 this_cpu_add(net->core.prot_inuse->all, val);
1533 }
1534
1535 int sock_prot_inuse_get(struct net *net, struct proto *proto);
1536 int sock_inuse_get(struct net *net);
1537 #else
1538 static inline void sock_prot_inuse_add(const struct net *net,
1539 const struct proto *prot, int val)
1540 {
1541 }
1542
1543 static inline void sock_inuse_add(const struct net *net, int val)
1544 {
1545 }
1546 #endif
1547
1548
1549
1550
1551
1552 static inline int __sk_prot_rehash(struct sock *sk)
1553 {
1554 sk->sk_prot->unhash(sk);
1555 return sk->sk_prot->hash(sk);
1556 }
1557
1558
1559 #define SOCK_DESTROY_TIME (10*HZ)
1560
1561
1562 #define PROT_SOCK 1024
1563
1564 #define SHUTDOWN_MASK 3
1565 #define RCV_SHUTDOWN 1
1566 #define SEND_SHUTDOWN 2
1567
1568 #define SOCK_BINDADDR_LOCK 4
1569 #define SOCK_BINDPORT_LOCK 8
1570
1571 struct socket_alloc {
1572 struct socket socket;
1573 struct inode vfs_inode;
1574 };
1575
1576 static inline struct socket *SOCKET_I(struct inode *inode)
1577 {
1578 return &container_of(inode, struct socket_alloc, vfs_inode)->socket;
1579 }
1580
1581 static inline struct inode *SOCK_INODE(struct socket *socket)
1582 {
1583 return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
1584 }
1585
1586
1587
1588
1589 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind);
1590 int __sk_mem_schedule(struct sock *sk, int size, int kind);
1591 void __sk_mem_reduce_allocated(struct sock *sk, int amount);
1592 void __sk_mem_reclaim(struct sock *sk, int amount);
1593
1594 #define SK_MEM_SEND 0
1595 #define SK_MEM_RECV 1
1596
1597
1598 static inline long sk_prot_mem_limits(const struct sock *sk, int index)
1599 {
1600 return READ_ONCE(sk->sk_prot->sysctl_mem[index]);
1601 }
1602
1603 static inline int sk_mem_pages(int amt)
1604 {
1605 return (amt + PAGE_SIZE - 1) >> PAGE_SHIFT;
1606 }
1607
1608 static inline bool sk_has_account(struct sock *sk)
1609 {
1610
1611 return !!sk->sk_prot->memory_allocated;
1612 }
1613
1614 static inline bool sk_wmem_schedule(struct sock *sk, int size)
1615 {
1616 int delta;
1617
1618 if (!sk_has_account(sk))
1619 return true;
1620 delta = size - sk->sk_forward_alloc;
1621 return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND);
1622 }
1623
1624 static inline bool
1625 sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
1626 {
1627 int delta;
1628
1629 if (!sk_has_account(sk))
1630 return true;
1631 delta = size - sk->sk_forward_alloc;
1632 return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
1633 skb_pfmemalloc(skb);
1634 }
1635
1636 static inline int sk_unused_reserved_mem(const struct sock *sk)
1637 {
1638 int unused_mem;
1639
1640 if (likely(!sk->sk_reserved_mem))
1641 return 0;
1642
1643 unused_mem = sk->sk_reserved_mem - sk->sk_wmem_queued -
1644 atomic_read(&sk->sk_rmem_alloc);
1645
1646 return unused_mem > 0 ? unused_mem : 0;
1647 }
1648
1649 static inline void sk_mem_reclaim(struct sock *sk)
1650 {
1651 int reclaimable;
1652
1653 if (!sk_has_account(sk))
1654 return;
1655
1656 reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
1657
1658 if (reclaimable >= (int)PAGE_SIZE)
1659 __sk_mem_reclaim(sk, reclaimable);
1660 }
1661
1662 static inline void sk_mem_reclaim_final(struct sock *sk)
1663 {
1664 sk->sk_reserved_mem = 0;
1665 sk_mem_reclaim(sk);
1666 }
1667
1668 static inline void sk_mem_charge(struct sock *sk, int size)
1669 {
1670 if (!sk_has_account(sk))
1671 return;
1672 sk->sk_forward_alloc -= size;
1673 }
1674
1675 static inline void sk_mem_uncharge(struct sock *sk, int size)
1676 {
1677 if (!sk_has_account(sk))
1678 return;
1679 sk->sk_forward_alloc += size;
1680 sk_mem_reclaim(sk);
1681 }
1682
1683
1684
1685
1686
1687
1688
1689
1690 #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \
1691 do { \
1692 sk->sk_lock.owned = 0; \
1693 init_waitqueue_head(&sk->sk_lock.wq); \
1694 spin_lock_init(&(sk)->sk_lock.slock); \
1695 debug_check_no_locks_freed((void *)&(sk)->sk_lock, \
1696 sizeof((sk)->sk_lock)); \
1697 lockdep_set_class_and_name(&(sk)->sk_lock.slock, \
1698 (skey), (sname)); \
1699 lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
1700 } while (0)
1701
1702 static inline bool lockdep_sock_is_held(const struct sock *sk)
1703 {
1704 return lockdep_is_held(&sk->sk_lock) ||
1705 lockdep_is_held(&sk->sk_lock.slock);
1706 }
1707
1708 void lock_sock_nested(struct sock *sk, int subclass);
1709
1710 static inline void lock_sock(struct sock *sk)
1711 {
1712 lock_sock_nested(sk, 0);
1713 }
1714
1715 void __lock_sock(struct sock *sk);
1716 void __release_sock(struct sock *sk);
1717 void release_sock(struct sock *sk);
1718
1719
1720 #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock))
1721 #define bh_lock_sock_nested(__sk) \
1722 spin_lock_nested(&((__sk)->sk_lock.slock), \
1723 SINGLE_DEPTH_NESTING)
1724 #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
1725
1726 bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741 static inline bool lock_sock_fast(struct sock *sk)
1742 {
1743
1744 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
1745
1746 return __lock_sock_fast(sk);
1747 }
1748
1749
1750 static inline bool lock_sock_fast_nested(struct sock *sk)
1751 {
1752 mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
1753
1754 return __lock_sock_fast(sk);
1755 }
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765 static inline void unlock_sock_fast(struct sock *sk, bool slow)
1766 __releases(&sk->sk_lock.slock)
1767 {
1768 if (slow) {
1769 release_sock(sk);
1770 __release(&sk->sk_lock.slock);
1771 } else {
1772 mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
1773 spin_unlock_bh(&sk->sk_lock.slock);
1774 }
1775 }
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791 static inline void sock_owned_by_me(const struct sock *sk)
1792 {
1793 #ifdef CONFIG_LOCKDEP
1794 WARN_ON_ONCE(!lockdep_sock_is_held(sk) && debug_locks);
1795 #endif
1796 }
1797
1798 static inline bool sock_owned_by_user(const struct sock *sk)
1799 {
1800 sock_owned_by_me(sk);
1801 return sk->sk_lock.owned;
1802 }
1803
1804 static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
1805 {
1806 return sk->sk_lock.owned;
1807 }
1808
1809 static inline void sock_release_ownership(struct sock *sk)
1810 {
1811 if (sock_owned_by_user_nocheck(sk)) {
1812 sk->sk_lock.owned = 0;
1813
1814
1815 mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
1816 }
1817 }
1818
1819
1820 static inline bool sock_allow_reclassification(const struct sock *csk)
1821 {
1822 struct sock *sk = (struct sock *)csk;
1823
1824 return !sock_owned_by_user_nocheck(sk) &&
1825 !spin_is_locked(&sk->sk_lock.slock);
1826 }
1827
1828 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1829 struct proto *prot, int kern);
1830 void sk_free(struct sock *sk);
1831 void sk_destruct(struct sock *sk);
1832 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
1833 void sk_free_unlock_clone(struct sock *sk);
1834
1835 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1836 gfp_t priority);
1837 void __sock_wfree(struct sk_buff *skb);
1838 void sock_wfree(struct sk_buff *skb);
1839 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
1840 gfp_t priority);
1841 void skb_orphan_partial(struct sk_buff *skb);
1842 void sock_rfree(struct sk_buff *skb);
1843 void sock_efree(struct sk_buff *skb);
1844 #ifdef CONFIG_INET
1845 void sock_edemux(struct sk_buff *skb);
1846 void sock_pfree(struct sk_buff *skb);
1847 #else
1848 #define sock_edemux sock_efree
1849 #endif
1850
1851 int sock_setsockopt(struct socket *sock, int level, int op,
1852 sockptr_t optval, unsigned int optlen);
1853
1854 int sock_getsockopt(struct socket *sock, int level, int op,
1855 char __user *optval, int __user *optlen);
1856 int sock_gettstamp(struct socket *sock, void __user *userstamp,
1857 bool timeval, bool time32);
1858 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1859 unsigned long data_len, int noblock,
1860 int *errcode, int max_page_order);
1861
1862 static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk,
1863 unsigned long size,
1864 int noblock, int *errcode)
1865 {
1866 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1867 }
1868
1869 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority);
1870 void sock_kfree_s(struct sock *sk, void *mem, int size);
1871 void sock_kzfree_s(struct sock *sk, void *mem, int size);
1872 void sk_send_sigurg(struct sock *sk);
1873
1874 struct sockcm_cookie {
1875 u64 transmit_time;
1876 u32 mark;
1877 u16 tsflags;
1878 };
1879
1880 static inline void sockcm_init(struct sockcm_cookie *sockc,
1881 const struct sock *sk)
1882 {
1883 *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
1884 }
1885
1886 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
1887 struct sockcm_cookie *sockc);
1888 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
1889 struct sockcm_cookie *sockc);
1890
1891
1892
1893
1894
1895 int sock_no_bind(struct socket *, struct sockaddr *, int);
1896 int sock_no_connect(struct socket *, struct sockaddr *, int, int);
1897 int sock_no_socketpair(struct socket *, struct socket *);
1898 int sock_no_accept(struct socket *, struct socket *, int, bool);
1899 int sock_no_getname(struct socket *, struct sockaddr *, int);
1900 int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
1901 int sock_no_listen(struct socket *, int);
1902 int sock_no_shutdown(struct socket *, int);
1903 int sock_no_sendmsg(struct socket *, struct msghdr *, size_t);
1904 int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
1905 int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
1906 int sock_no_mmap(struct file *file, struct socket *sock,
1907 struct vm_area_struct *vma);
1908 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
1909 size_t size, int flags);
1910 ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
1911 int offset, size_t size, int flags);
1912
1913
1914
1915
1916
1917 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1918 char __user *optval, int __user *optlen);
1919 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
1920 int flags);
1921 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1922 sockptr_t optval, unsigned int optlen);
1923
1924 void sk_common_release(struct sock *sk);
1925
1926
1927
1928
1929
1930
1931 void sock_init_data(struct socket *sock, struct sock *sk);
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959 static inline void sock_put(struct sock *sk)
1960 {
1961 if (refcount_dec_and_test(&sk->sk_refcnt))
1962 sk_free(sk);
1963 }
1964
1965
1966
1967 void sock_gen_put(struct sock *sk);
1968
1969 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
1970 unsigned int trim_cap, bool refcounted);
1971 static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
1972 const int nested)
1973 {
1974 return __sk_receive_skb(sk, skb, nested, 1, true);
1975 }
1976
1977 static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
1978 {
1979
1980 if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX))
1981 return;
1982 sk->sk_tx_queue_mapping = tx_queue;
1983 }
1984
1985 #define NO_QUEUE_MAPPING USHRT_MAX
1986
1987 static inline void sk_tx_queue_clear(struct sock *sk)
1988 {
1989 sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING;
1990 }
1991
1992 static inline int sk_tx_queue_get(const struct sock *sk)
1993 {
1994 if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING)
1995 return sk->sk_tx_queue_mapping;
1996
1997 return -1;
1998 }
1999
2000 static inline void __sk_rx_queue_set(struct sock *sk,
2001 const struct sk_buff *skb,
2002 bool force_set)
2003 {
2004 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
2005 if (skb_rx_queue_recorded(skb)) {
2006 u16 rx_queue = skb_get_rx_queue(skb);
2007
2008 if (force_set ||
2009 unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue))
2010 WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue);
2011 }
2012 #endif
2013 }
2014
2015 static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
2016 {
2017 __sk_rx_queue_set(sk, skb, true);
2018 }
2019
2020 static inline void sk_rx_queue_update(struct sock *sk, const struct sk_buff *skb)
2021 {
2022 __sk_rx_queue_set(sk, skb, false);
2023 }
2024
2025 static inline void sk_rx_queue_clear(struct sock *sk)
2026 {
2027 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
2028 WRITE_ONCE(sk->sk_rx_queue_mapping, NO_QUEUE_MAPPING);
2029 #endif
2030 }
2031
2032 static inline int sk_rx_queue_get(const struct sock *sk)
2033 {
2034 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
2035 if (sk) {
2036 int res = READ_ONCE(sk->sk_rx_queue_mapping);
2037
2038 if (res != NO_QUEUE_MAPPING)
2039 return res;
2040 }
2041 #endif
2042
2043 return -1;
2044 }
2045
2046 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
2047 {
2048 sk->sk_socket = sock;
2049 }
2050
2051 static inline wait_queue_head_t *sk_sleep(struct sock *sk)
2052 {
2053 BUILD_BUG_ON(offsetof(struct socket_wq, wait) != 0);
2054 return &rcu_dereference_raw(sk->sk_wq)->wait;
2055 }
2056
2057
2058
2059
2060
2061
2062
2063 static inline void sock_orphan(struct sock *sk)
2064 {
2065 write_lock_bh(&sk->sk_callback_lock);
2066 sock_set_flag(sk, SOCK_DEAD);
2067 sk_set_socket(sk, NULL);
2068 sk->sk_wq = NULL;
2069 write_unlock_bh(&sk->sk_callback_lock);
2070 }
2071
2072 static inline void sock_graft(struct sock *sk, struct socket *parent)
2073 {
2074 WARN_ON(parent->sk);
2075 write_lock_bh(&sk->sk_callback_lock);
2076 rcu_assign_pointer(sk->sk_wq, &parent->wq);
2077 parent->sk = sk;
2078 sk_set_socket(sk, parent);
2079 sk->sk_uid = SOCK_INODE(parent)->i_uid;
2080 security_sock_graft(sk, parent);
2081 write_unlock_bh(&sk->sk_callback_lock);
2082 }
2083
2084 kuid_t sock_i_uid(struct sock *sk);
2085 unsigned long sock_i_ino(struct sock *sk);
2086
2087 static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
2088 {
2089 return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
2090 }
2091
2092 static inline u32 net_tx_rndhash(void)
2093 {
2094 u32 v = prandom_u32();
2095
2096 return v ?: 1;
2097 }
2098
2099 static inline void sk_set_txhash(struct sock *sk)
2100 {
2101
2102 WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
2103 }
2104
2105 static inline bool sk_rethink_txhash(struct sock *sk)
2106 {
2107 if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
2108 sk_set_txhash(sk);
2109 return true;
2110 }
2111 return false;
2112 }
2113
2114 static inline struct dst_entry *
2115 __sk_dst_get(struct sock *sk)
2116 {
2117 return rcu_dereference_check(sk->sk_dst_cache,
2118 lockdep_sock_is_held(sk));
2119 }
2120
2121 static inline struct dst_entry *
2122 sk_dst_get(struct sock *sk)
2123 {
2124 struct dst_entry *dst;
2125
2126 rcu_read_lock();
2127 dst = rcu_dereference(sk->sk_dst_cache);
2128 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
2129 dst = NULL;
2130 rcu_read_unlock();
2131 return dst;
2132 }
2133
2134 static inline void __dst_negative_advice(struct sock *sk)
2135 {
2136 struct dst_entry *ndst, *dst = __sk_dst_get(sk);
2137
2138 if (dst && dst->ops->negative_advice) {
2139 ndst = dst->ops->negative_advice(dst);
2140
2141 if (ndst != dst) {
2142 rcu_assign_pointer(sk->sk_dst_cache, ndst);
2143 sk_tx_queue_clear(sk);
2144 sk->sk_dst_pending_confirm = 0;
2145 }
2146 }
2147 }
2148
2149 static inline void dst_negative_advice(struct sock *sk)
2150 {
2151 sk_rethink_txhash(sk);
2152 __dst_negative_advice(sk);
2153 }
2154
2155 static inline void
2156 __sk_dst_set(struct sock *sk, struct dst_entry *dst)
2157 {
2158 struct dst_entry *old_dst;
2159
2160 sk_tx_queue_clear(sk);
2161 sk->sk_dst_pending_confirm = 0;
2162 old_dst = rcu_dereference_protected(sk->sk_dst_cache,
2163 lockdep_sock_is_held(sk));
2164 rcu_assign_pointer(sk->sk_dst_cache, dst);
2165 dst_release(old_dst);
2166 }
2167
2168 static inline void
2169 sk_dst_set(struct sock *sk, struct dst_entry *dst)
2170 {
2171 struct dst_entry *old_dst;
2172
2173 sk_tx_queue_clear(sk);
2174 sk->sk_dst_pending_confirm = 0;
2175 old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
2176 dst_release(old_dst);
2177 }
2178
2179 static inline void
2180 __sk_dst_reset(struct sock *sk)
2181 {
2182 __sk_dst_set(sk, NULL);
2183 }
2184
2185 static inline void
2186 sk_dst_reset(struct sock *sk)
2187 {
2188 sk_dst_set(sk, NULL);
2189 }
2190
2191 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
2192
2193 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
2194
2195 static inline void sk_dst_confirm(struct sock *sk)
2196 {
2197 if (!READ_ONCE(sk->sk_dst_pending_confirm))
2198 WRITE_ONCE(sk->sk_dst_pending_confirm, 1);
2199 }
2200
2201 static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
2202 {
2203 if (skb_get_dst_pending_confirm(skb)) {
2204 struct sock *sk = skb->sk;
2205
2206 if (sk && READ_ONCE(sk->sk_dst_pending_confirm))
2207 WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
2208 neigh_confirm(n);
2209 }
2210 }
2211
2212 bool sk_mc_loop(struct sock *sk);
2213
2214 static inline bool sk_can_gso(const struct sock *sk)
2215 {
2216 return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
2217 }
2218
2219 void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
2220
2221 static inline void sk_gso_disable(struct sock *sk)
2222 {
2223 sk->sk_gso_disabled = 1;
2224 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2225 }
2226
2227 static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
2228 struct iov_iter *from, char *to,
2229 int copy, int offset)
2230 {
2231 if (skb->ip_summed == CHECKSUM_NONE) {
2232 __wsum csum = 0;
2233 if (!csum_and_copy_from_iter_full(to, copy, &csum, from))
2234 return -EFAULT;
2235 skb->csum = csum_block_add(skb->csum, csum, offset);
2236 } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
2237 if (!copy_from_iter_full_nocache(to, copy, from))
2238 return -EFAULT;
2239 } else if (!copy_from_iter_full(to, copy, from))
2240 return -EFAULT;
2241
2242 return 0;
2243 }
2244
2245 static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
2246 struct iov_iter *from, int copy)
2247 {
2248 int err, offset = skb->len;
2249
2250 err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy),
2251 copy, offset);
2252 if (err)
2253 __skb_trim(skb, offset);
2254
2255 return err;
2256 }
2257
2258 static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from,
2259 struct sk_buff *skb,
2260 struct page *page,
2261 int off, int copy)
2262 {
2263 int err;
2264
2265 err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + off,
2266 copy, skb->len);
2267 if (err)
2268 return err;
2269
2270 skb_len_add(skb, copy);
2271 sk_wmem_queued_add(sk, copy);
2272 sk_mem_charge(sk, copy);
2273 return 0;
2274 }
2275
2276
2277
2278
2279
2280
2281
2282 static inline int sk_wmem_alloc_get(const struct sock *sk)
2283 {
2284 return refcount_read(&sk->sk_wmem_alloc) - 1;
2285 }
2286
2287
2288
2289
2290
2291
2292
2293 static inline int sk_rmem_alloc_get(const struct sock *sk)
2294 {
2295 return atomic_read(&sk->sk_rmem_alloc);
2296 }
2297
2298
2299
2300
2301
2302
2303
2304 static inline bool sk_has_allocations(const struct sock *sk)
2305 {
2306 return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk);
2307 }
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340 static inline bool skwq_has_sleeper(struct socket_wq *wq)
2341 {
2342 return wq && wq_has_sleeper(&wq->wait);
2343 }
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353 static inline void sock_poll_wait(struct file *filp, struct socket *sock,
2354 poll_table *p)
2355 {
2356 if (!poll_does_not_wait(p)) {
2357 poll_wait(filp, &sock->wq.wait, p);
2358
2359
2360
2361
2362
2363 smp_mb();
2364 }
2365 }
2366
2367 static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
2368 {
2369
2370 u32 txhash = READ_ONCE(sk->sk_txhash);
2371
2372 if (txhash) {
2373 skb->l4_hash = 1;
2374 skb->hash = txhash;
2375 }
2376 }
2377
2378 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk);
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388 static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
2389 {
2390 skb_orphan(skb);
2391 skb->sk = sk;
2392 skb->destructor = sock_rfree;
2393 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
2394 sk_mem_charge(sk, skb->truesize);
2395 }
2396
2397 static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk)
2398 {
2399 if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) {
2400 skb_orphan(skb);
2401 skb->destructor = sock_efree;
2402 skb->sk = sk;
2403 return true;
2404 }
2405 return false;
2406 }
2407
2408 static inline void skb_prepare_for_gro(struct sk_buff *skb)
2409 {
2410 if (skb->destructor != sock_wfree) {
2411 skb_orphan(skb);
2412 return;
2413 }
2414 skb->slow_gro = 1;
2415 }
2416
2417 void sk_reset_timer(struct sock *sk, struct timer_list *timer,
2418 unsigned long expires);
2419
2420 void sk_stop_timer(struct sock *sk, struct timer_list *timer);
2421
2422 void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer);
2423
2424 int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
2425 struct sk_buff *skb, unsigned int flags,
2426 void (*destructor)(struct sock *sk,
2427 struct sk_buff *skb));
2428 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
2429
2430 int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
2431 enum skb_drop_reason *reason);
2432
2433 static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
2434 {
2435 return sock_queue_rcv_skb_reason(sk, skb, NULL);
2436 }
2437
2438 int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
2439 struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
2440
2441
2442
2443
2444
2445 static inline int sock_error(struct sock *sk)
2446 {
2447 int err;
2448
2449
2450
2451
2452 if (likely(data_race(!sk->sk_err)))
2453 return 0;
2454
2455 err = xchg(&sk->sk_err, 0);
2456 return -err;
2457 }
2458
2459 void sk_error_report(struct sock *sk);
2460
2461 static inline unsigned long sock_wspace(struct sock *sk)
2462 {
2463 int amt = 0;
2464
2465 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
2466 amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc);
2467 if (amt < 0)
2468 amt = 0;
2469 }
2470 return amt;
2471 }
2472
2473
2474
2475
2476
2477 static inline void sk_set_bit(int nr, struct sock *sk)
2478 {
2479 if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) &&
2480 !sock_flag(sk, SOCK_FASYNC))
2481 return;
2482
2483 set_bit(nr, &sk->sk_wq_raw->flags);
2484 }
2485
2486 static inline void sk_clear_bit(int nr, struct sock *sk)
2487 {
2488 if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) &&
2489 !sock_flag(sk, SOCK_FASYNC))
2490 return;
2491
2492 clear_bit(nr, &sk->sk_wq_raw->flags);
2493 }
2494
2495 static inline void sk_wake_async(const struct sock *sk, int how, int band)
2496 {
2497 if (sock_flag(sk, SOCK_FASYNC)) {
2498 rcu_read_lock();
2499 sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
2500 rcu_read_unlock();
2501 }
2502 }
2503
2504
2505
2506
2507
2508
2509 #define TCP_SKB_MIN_TRUESIZE (2048 + SKB_DATA_ALIGN(sizeof(struct sk_buff)))
2510
2511 #define SOCK_MIN_SNDBUF (TCP_SKB_MIN_TRUESIZE * 2)
2512 #define SOCK_MIN_RCVBUF TCP_SKB_MIN_TRUESIZE
2513
2514 static inline void sk_stream_moderate_sndbuf(struct sock *sk)
2515 {
2516 u32 val;
2517
2518 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
2519 return;
2520
2521 val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
2522 val = max_t(u32, val, sk_unused_reserved_mem(sk));
2523
2524 WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF));
2525 }
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544 static inline struct page_frag *sk_page_frag(struct sock *sk)
2545 {
2546 if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) ==
2547 (__GFP_DIRECT_RECLAIM | __GFP_FS))
2548 return ¤t->task_frag;
2549
2550 return &sk->sk_frag;
2551 }
2552
2553 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
2554
2555
2556
2557
2558 static inline bool sock_writeable(const struct sock *sk)
2559 {
2560 return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
2561 }
2562
2563 static inline gfp_t gfp_any(void)
2564 {
2565 return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
2566 }
2567
2568 static inline gfp_t gfp_memcg_charge(void)
2569 {
2570 return in_softirq() ? GFP_NOWAIT : GFP_KERNEL;
2571 }
2572
2573 static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
2574 {
2575 return noblock ? 0 : sk->sk_rcvtimeo;
2576 }
2577
2578 static inline long sock_sndtimeo(const struct sock *sk, bool noblock)
2579 {
2580 return noblock ? 0 : sk->sk_sndtimeo;
2581 }
2582
2583 static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
2584 {
2585 int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len);
2586
2587 return v ?: 1;
2588 }
2589
2590
2591
2592
2593 static inline int sock_intr_errno(long timeo)
2594 {
2595 return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
2596 }
2597
2598 struct sock_skb_cb {
2599 u32 dropcount;
2600 };
2601
2602
2603
2604
2605
2606 #define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \
2607 sizeof(struct sock_skb_cb)))
2608
2609 #define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
2610 SOCK_SKB_CB_OFFSET))
2611
2612 #define sock_skb_cb_check_size(size) \
2613 BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
2614
2615 static inline void
2616 sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
2617 {
2618 SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ?
2619 atomic_read(&sk->sk_drops) : 0;
2620 }
2621
2622 static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
2623 {
2624 int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
2625
2626 atomic_add(segs, &sk->sk_drops);
2627 }
2628
2629 static inline ktime_t sock_read_timestamp(struct sock *sk)
2630 {
2631 #if BITS_PER_LONG==32
2632 unsigned int seq;
2633 ktime_t kt;
2634
2635 do {
2636 seq = read_seqbegin(&sk->sk_stamp_seq);
2637 kt = sk->sk_stamp;
2638 } while (read_seqretry(&sk->sk_stamp_seq, seq));
2639
2640 return kt;
2641 #else
2642 return READ_ONCE(sk->sk_stamp);
2643 #endif
2644 }
2645
2646 static inline void sock_write_timestamp(struct sock *sk, ktime_t kt)
2647 {
2648 #if BITS_PER_LONG==32
2649 write_seqlock(&sk->sk_stamp_seq);
2650 sk->sk_stamp = kt;
2651 write_sequnlock(&sk->sk_stamp_seq);
2652 #else
2653 WRITE_ONCE(sk->sk_stamp, kt);
2654 #endif
2655 }
2656
2657 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
2658 struct sk_buff *skb);
2659 void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
2660 struct sk_buff *skb);
2661
2662 static inline void
2663 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
2664 {
2665 ktime_t kt = skb->tstamp;
2666 struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
2667
2668
2669
2670
2671
2672
2673
2674 if (sock_flag(sk, SOCK_RCVTSTAMP) ||
2675 (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
2676 (kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
2677 (hwtstamps->hwtstamp &&
2678 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
2679 __sock_recv_timestamp(msg, sk, skb);
2680 else
2681 sock_write_timestamp(sk, kt);
2682
2683 if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
2684 __sock_recv_wifi_status(msg, sk, skb);
2685 }
2686
2687 void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
2688 struct sk_buff *skb);
2689
2690 #define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC)
2691 static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
2692 struct sk_buff *skb)
2693 {
2694 #define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \
2695 (1UL << SOCK_RCVTSTAMP) | \
2696 (1UL << SOCK_RCVMARK))
2697 #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
2698 SOF_TIMESTAMPING_RAW_HARDWARE)
2699
2700 if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY)
2701 __sock_recv_cmsgs(msg, sk, skb);
2702 else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
2703 sock_write_timestamp(sk, skb->tstamp);
2704 else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
2705 sock_write_timestamp(sk, 0);
2706 }
2707
2708 void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719 static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags,
2720 __u8 *tx_flags, __u32 *tskey)
2721 {
2722 if (unlikely(tsflags)) {
2723 __sock_tx_timestamp(tsflags, tx_flags);
2724 if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey &&
2725 tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
2726 *tskey = atomic_inc_return(&sk->sk_tskey) - 1;
2727 }
2728 if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
2729 *tx_flags |= SKBTX_WIFI_STATUS;
2730 }
2731
2732 static inline void sock_tx_timestamp(struct sock *sk, __u16 tsflags,
2733 __u8 *tx_flags)
2734 {
2735 _sock_tx_timestamp(sk, tsflags, tx_flags, NULL);
2736 }
2737
2738 static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
2739 {
2740 _sock_tx_timestamp(skb->sk, tsflags, &skb_shinfo(skb)->tx_flags,
2741 &skb_shinfo(skb)->tskey);
2742 }
2743
2744 static inline bool sk_is_tcp(const struct sock *sk)
2745 {
2746 return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP;
2747 }
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757 static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
2758 {
2759 __skb_unlink(skb, &sk->sk_receive_queue);
2760 __kfree_skb(skb);
2761 }
2762
2763 static inline bool
2764 skb_sk_is_prefetched(struct sk_buff *skb)
2765 {
2766 #ifdef CONFIG_INET
2767 return skb->destructor == sock_pfree;
2768 #else
2769 return false;
2770 #endif
2771 }
2772
2773
2774
2775
2776 static inline bool sk_fullsock(const struct sock *sk)
2777 {
2778 return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
2779 }
2780
2781 static inline bool
2782 sk_is_refcounted(struct sock *sk)
2783 {
2784
2785 return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE);
2786 }
2787
2788
2789
2790
2791
2792
2793 static inline struct sock *
2794 skb_steal_sock(struct sk_buff *skb, bool *refcounted)
2795 {
2796 if (skb->sk) {
2797 struct sock *sk = skb->sk;
2798
2799 *refcounted = true;
2800 if (skb_sk_is_prefetched(skb))
2801 *refcounted = sk_is_refcounted(sk);
2802 skb->destructor = NULL;
2803 skb->sk = NULL;
2804 return sk;
2805 }
2806 *refcounted = false;
2807 return NULL;
2808 }
2809
2810
2811
2812
2813
2814 static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
2815 struct net_device *dev)
2816 {
2817 #ifdef CONFIG_SOCK_VALIDATE_XMIT
2818 struct sock *sk = skb->sk;
2819
2820 if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) {
2821 skb = sk->sk_validate_xmit_skb(sk, dev, skb);
2822 #ifdef CONFIG_TLS_DEVICE
2823 } else if (unlikely(skb->decrypted)) {
2824 pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n");
2825 kfree_skb(skb);
2826 skb = NULL;
2827 #endif
2828 }
2829 #endif
2830
2831 return skb;
2832 }
2833
2834
2835
2836
2837 static inline bool sk_listener(const struct sock *sk)
2838 {
2839 return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
2840 }
2841
2842 void sock_enable_timestamp(struct sock *sk, enum sock_flags flag);
2843 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
2844 int type);
2845
2846 bool sk_ns_capable(const struct sock *sk,
2847 struct user_namespace *user_ns, int cap);
2848 bool sk_capable(const struct sock *sk, int cap);
2849 bool sk_net_capable(const struct sock *sk, int cap);
2850
2851 void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
2852
2853
2854
2855
2856
2857
2858 #define _SK_MEM_PACKETS 256
2859 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
2860 #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
2861 #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
2862
2863 extern __u32 sysctl_wmem_max;
2864 extern __u32 sysctl_rmem_max;
2865
2866 extern int sysctl_tstamp_allow_data;
2867 extern int sysctl_optmem_max;
2868
2869 extern __u32 sysctl_wmem_default;
2870 extern __u32 sysctl_rmem_default;
2871
2872 #define SKB_FRAG_PAGE_ORDER get_order(32768)
2873 DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
2874
2875 static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
2876 {
2877
2878 if (proto->sysctl_wmem_offset)
2879 return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
2880
2881 return READ_ONCE(*proto->sysctl_wmem);
2882 }
2883
2884 static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
2885 {
2886
2887 if (proto->sysctl_rmem_offset)
2888 return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
2889
2890 return READ_ONCE(*proto->sysctl_rmem);
2891 }
2892
2893
2894
2895
2896
2897 static inline void sk_pacing_shift_update(struct sock *sk, int val)
2898 {
2899 if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val)
2900 return;
2901 WRITE_ONCE(sk->sk_pacing_shift, val);
2902 }
2903
2904
2905
2906
2907
2908
2909 static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
2910 {
2911 int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
2912 int mdif;
2913
2914 if (!bound_dev_if || bound_dev_if == dif)
2915 return true;
2916
2917 mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif);
2918 if (mdif && mdif == bound_dev_if)
2919 return true;
2920
2921 return false;
2922 }
2923
2924 void sock_def_readable(struct sock *sk);
2925
2926 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk);
2927 void sock_set_timestamp(struct sock *sk, int optname, bool valbool);
2928 int sock_set_timestamping(struct sock *sk, int optname,
2929 struct so_timestamping timestamping);
2930
2931 void sock_enable_timestamps(struct sock *sk);
2932 void sock_no_linger(struct sock *sk);
2933 void sock_set_keepalive(struct sock *sk);
2934 void sock_set_priority(struct sock *sk, u32 priority);
2935 void sock_set_rcvbuf(struct sock *sk, int val);
2936 void sock_set_mark(struct sock *sk, u32 val);
2937 void sock_set_reuseaddr(struct sock *sk);
2938 void sock_set_reuseport(struct sock *sk);
2939 void sock_set_sndtimeo(struct sock *sk, s64 secs);
2940
2941 int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len);
2942
2943 int sock_get_timeout(long timeo, void *optval, bool old_timeval);
2944 int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
2945 sockptr_t optval, int optlen, bool old_timeval);
2946
2947 static inline bool sk_is_readable(struct sock *sk)
2948 {
2949 if (sk->sk_prot->sock_is_readable)
2950 return sk->sk_prot->sock_is_readable(sk);
2951 return false;
2952 }
2953 #endif