0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075 #define pr_fmt(fmt) "UDP: " fmt
0076
0077 #include <linux/bpf-cgroup.h>
0078 #include <linux/uaccess.h>
0079 #include <asm/ioctls.h>
0080 #include <linux/memblock.h>
0081 #include <linux/highmem.h>
0082 #include <linux/types.h>
0083 #include <linux/fcntl.h>
0084 #include <linux/module.h>
0085 #include <linux/socket.h>
0086 #include <linux/sockios.h>
0087 #include <linux/igmp.h>
0088 #include <linux/inetdevice.h>
0089 #include <linux/in.h>
0090 #include <linux/errno.h>
0091 #include <linux/timer.h>
0092 #include <linux/mm.h>
0093 #include <linux/inet.h>
0094 #include <linux/netdevice.h>
0095 #include <linux/slab.h>
0096 #include <net/tcp_states.h>
0097 #include <linux/skbuff.h>
0098 #include <linux/proc_fs.h>
0099 #include <linux/seq_file.h>
0100 #include <net/net_namespace.h>
0101 #include <net/icmp.h>
0102 #include <net/inet_hashtables.h>
0103 #include <net/ip_tunnels.h>
0104 #include <net/route.h>
0105 #include <net/checksum.h>
0106 #include <net/xfrm.h>
0107 #include <trace/events/udp.h>
0108 #include <linux/static_key.h>
0109 #include <linux/btf_ids.h>
0110 #include <trace/events/skb.h>
0111 #include <net/busy_poll.h>
0112 #include "udp_impl.h"
0113 #include <net/sock_reuseport.h>
0114 #include <net/addrconf.h>
0115 #include <net/udp_tunnel.h>
0116 #if IS_ENABLED(CONFIG_IPV6)
0117 #include <net/ipv6_stubs.h>
0118 #endif
0119
0120 struct udp_table udp_table __read_mostly;
0121 EXPORT_SYMBOL(udp_table);
0122
0123 long sysctl_udp_mem[3] __read_mostly;
0124 EXPORT_SYMBOL(sysctl_udp_mem);
0125
0126 atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp;
0127 EXPORT_SYMBOL(udp_memory_allocated);
0128 DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc);
0129 EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc);
0130
0131 #define MAX_UDP_PORTS 65536
0132 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
0133
0134 static int udp_lib_lport_inuse(struct net *net, __u16 num,
0135 const struct udp_hslot *hslot,
0136 unsigned long *bitmap,
0137 struct sock *sk, unsigned int log)
0138 {
0139 struct sock *sk2;
0140 kuid_t uid = sock_i_uid(sk);
0141
0142 sk_for_each(sk2, &hslot->head) {
0143 if (net_eq(sock_net(sk2), net) &&
0144 sk2 != sk &&
0145 (bitmap || udp_sk(sk2)->udp_port_hash == num) &&
0146 (!sk2->sk_reuse || !sk->sk_reuse) &&
0147 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
0148 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
0149 inet_rcv_saddr_equal(sk, sk2, true)) {
0150 if (sk2->sk_reuseport && sk->sk_reuseport &&
0151 !rcu_access_pointer(sk->sk_reuseport_cb) &&
0152 uid_eq(uid, sock_i_uid(sk2))) {
0153 if (!bitmap)
0154 return 0;
0155 } else {
0156 if (!bitmap)
0157 return 1;
0158 __set_bit(udp_sk(sk2)->udp_port_hash >> log,
0159 bitmap);
0160 }
0161 }
0162 }
0163 return 0;
0164 }
0165
0166
0167
0168
0169
0170 static int udp_lib_lport_inuse2(struct net *net, __u16 num,
0171 struct udp_hslot *hslot2,
0172 struct sock *sk)
0173 {
0174 struct sock *sk2;
0175 kuid_t uid = sock_i_uid(sk);
0176 int res = 0;
0177
0178 spin_lock(&hslot2->lock);
0179 udp_portaddr_for_each_entry(sk2, &hslot2->head) {
0180 if (net_eq(sock_net(sk2), net) &&
0181 sk2 != sk &&
0182 (udp_sk(sk2)->udp_port_hash == num) &&
0183 (!sk2->sk_reuse || !sk->sk_reuse) &&
0184 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
0185 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
0186 inet_rcv_saddr_equal(sk, sk2, true)) {
0187 if (sk2->sk_reuseport && sk->sk_reuseport &&
0188 !rcu_access_pointer(sk->sk_reuseport_cb) &&
0189 uid_eq(uid, sock_i_uid(sk2))) {
0190 res = 0;
0191 } else {
0192 res = 1;
0193 }
0194 break;
0195 }
0196 }
0197 spin_unlock(&hslot2->lock);
0198 return res;
0199 }
0200
0201 static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
0202 {
0203 struct net *net = sock_net(sk);
0204 kuid_t uid = sock_i_uid(sk);
0205 struct sock *sk2;
0206
0207 sk_for_each(sk2, &hslot->head) {
0208 if (net_eq(sock_net(sk2), net) &&
0209 sk2 != sk &&
0210 sk2->sk_family == sk->sk_family &&
0211 ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
0212 (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
0213 (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
0214 sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
0215 inet_rcv_saddr_equal(sk, sk2, false)) {
0216 return reuseport_add_sock(sk, sk2,
0217 inet_rcv_saddr_any(sk));
0218 }
0219 }
0220
0221 return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
0222 }
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232 int udp_lib_get_port(struct sock *sk, unsigned short snum,
0233 unsigned int hash2_nulladdr)
0234 {
0235 struct udp_hslot *hslot, *hslot2;
0236 struct udp_table *udptable = sk->sk_prot->h.udp_table;
0237 int error = 1;
0238 struct net *net = sock_net(sk);
0239
0240 if (!snum) {
0241 int low, high, remaining;
0242 unsigned int rand;
0243 unsigned short first, last;
0244 DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
0245
0246 inet_get_local_port_range(net, &low, &high);
0247 remaining = (high - low) + 1;
0248
0249 rand = prandom_u32();
0250 first = reciprocal_scale(rand, remaining) + low;
0251
0252
0253
0254 rand = (rand | 1) * (udptable->mask + 1);
0255 last = first + udptable->mask + 1;
0256 do {
0257 hslot = udp_hashslot(udptable, net, first);
0258 bitmap_zero(bitmap, PORTS_PER_CHAIN);
0259 spin_lock_bh(&hslot->lock);
0260 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
0261 udptable->log);
0262
0263 snum = first;
0264
0265
0266
0267
0268
0269 do {
0270 if (low <= snum && snum <= high &&
0271 !test_bit(snum >> udptable->log, bitmap) &&
0272 !inet_is_local_reserved_port(net, snum))
0273 goto found;
0274 snum += rand;
0275 } while (snum != first);
0276 spin_unlock_bh(&hslot->lock);
0277 cond_resched();
0278 } while (++first != last);
0279 goto fail;
0280 } else {
0281 hslot = udp_hashslot(udptable, net, snum);
0282 spin_lock_bh(&hslot->lock);
0283 if (hslot->count > 10) {
0284 int exist;
0285 unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
0286
0287 slot2 &= udptable->mask;
0288 hash2_nulladdr &= udptable->mask;
0289
0290 hslot2 = udp_hashslot2(udptable, slot2);
0291 if (hslot->count < hslot2->count)
0292 goto scan_primary_hash;
0293
0294 exist = udp_lib_lport_inuse2(net, snum, hslot2, sk);
0295 if (!exist && (hash2_nulladdr != slot2)) {
0296 hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
0297 exist = udp_lib_lport_inuse2(net, snum, hslot2,
0298 sk);
0299 }
0300 if (exist)
0301 goto fail_unlock;
0302 else
0303 goto found;
0304 }
0305 scan_primary_hash:
0306 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0))
0307 goto fail_unlock;
0308 }
0309 found:
0310 inet_sk(sk)->inet_num = snum;
0311 udp_sk(sk)->udp_port_hash = snum;
0312 udp_sk(sk)->udp_portaddr_hash ^= snum;
0313 if (sk_unhashed(sk)) {
0314 if (sk->sk_reuseport &&
0315 udp_reuseport_add_sock(sk, hslot)) {
0316 inet_sk(sk)->inet_num = 0;
0317 udp_sk(sk)->udp_port_hash = 0;
0318 udp_sk(sk)->udp_portaddr_hash ^= snum;
0319 goto fail_unlock;
0320 }
0321
0322 sk_add_node_rcu(sk, &hslot->head);
0323 hslot->count++;
0324 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
0325
0326 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
0327 spin_lock(&hslot2->lock);
0328 if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
0329 sk->sk_family == AF_INET6)
0330 hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node,
0331 &hslot2->head);
0332 else
0333 hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
0334 &hslot2->head);
0335 hslot2->count++;
0336 spin_unlock(&hslot2->lock);
0337 }
0338 sock_set_flag(sk, SOCK_RCU_FREE);
0339 error = 0;
0340 fail_unlock:
0341 spin_unlock_bh(&hslot->lock);
0342 fail:
0343 return error;
0344 }
0345 EXPORT_SYMBOL(udp_lib_get_port);
0346
0347 int udp_v4_get_port(struct sock *sk, unsigned short snum)
0348 {
0349 unsigned int hash2_nulladdr =
0350 ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
0351 unsigned int hash2_partial =
0352 ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
0353
0354
0355 udp_sk(sk)->udp_portaddr_hash = hash2_partial;
0356 return udp_lib_get_port(sk, snum, hash2_nulladdr);
0357 }
0358
0359 static int compute_score(struct sock *sk, struct net *net,
0360 __be32 saddr, __be16 sport,
0361 __be32 daddr, unsigned short hnum,
0362 int dif, int sdif)
0363 {
0364 int score;
0365 struct inet_sock *inet;
0366 bool dev_match;
0367
0368 if (!net_eq(sock_net(sk), net) ||
0369 udp_sk(sk)->udp_port_hash != hnum ||
0370 ipv6_only_sock(sk))
0371 return -1;
0372
0373 if (sk->sk_rcv_saddr != daddr)
0374 return -1;
0375
0376 score = (sk->sk_family == PF_INET) ? 2 : 1;
0377
0378 inet = inet_sk(sk);
0379 if (inet->inet_daddr) {
0380 if (inet->inet_daddr != saddr)
0381 return -1;
0382 score += 4;
0383 }
0384
0385 if (inet->inet_dport) {
0386 if (inet->inet_dport != sport)
0387 return -1;
0388 score += 4;
0389 }
0390
0391 dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
0392 dif, sdif);
0393 if (!dev_match)
0394 return -1;
0395 if (sk->sk_bound_dev_if)
0396 score += 4;
0397
0398 if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
0399 score++;
0400 return score;
0401 }
0402
0403 static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
0404 const __u16 lport, const __be32 faddr,
0405 const __be16 fport)
0406 {
0407 static u32 udp_ehash_secret __read_mostly;
0408
0409 net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret));
0410
0411 return __inet_ehashfn(laddr, lport, faddr, fport,
0412 udp_ehash_secret + net_hash_mix(net));
0413 }
0414
0415 static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
0416 struct sk_buff *skb,
0417 __be32 saddr, __be16 sport,
0418 __be32 daddr, unsigned short hnum)
0419 {
0420 struct sock *reuse_sk = NULL;
0421 u32 hash;
0422
0423 if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
0424 hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
0425 reuse_sk = reuseport_select_sock(sk, hash, skb,
0426 sizeof(struct udphdr));
0427 }
0428 return reuse_sk;
0429 }
0430
0431
0432 static struct sock *udp4_lib_lookup2(struct net *net,
0433 __be32 saddr, __be16 sport,
0434 __be32 daddr, unsigned int hnum,
0435 int dif, int sdif,
0436 struct udp_hslot *hslot2,
0437 struct sk_buff *skb)
0438 {
0439 struct sock *sk, *result;
0440 int score, badness;
0441
0442 result = NULL;
0443 badness = 0;
0444 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
0445 score = compute_score(sk, net, saddr, sport,
0446 daddr, hnum, dif, sdif);
0447 if (score > badness) {
0448 result = lookup_reuseport(net, sk, skb,
0449 saddr, sport, daddr, hnum);
0450
0451 if (result && !reuseport_has_conns(sk, false))
0452 return result;
0453
0454 result = result ? : sk;
0455 badness = score;
0456 }
0457 }
0458 return result;
0459 }
0460
0461 static struct sock *udp4_lookup_run_bpf(struct net *net,
0462 struct udp_table *udptable,
0463 struct sk_buff *skb,
0464 __be32 saddr, __be16 sport,
0465 __be32 daddr, u16 hnum, const int dif)
0466 {
0467 struct sock *sk, *reuse_sk;
0468 bool no_reuseport;
0469
0470 if (udptable != &udp_table)
0471 return NULL;
0472
0473 no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport,
0474 daddr, hnum, dif, &sk);
0475 if (no_reuseport || IS_ERR_OR_NULL(sk))
0476 return sk;
0477
0478 reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
0479 if (reuse_sk)
0480 sk = reuse_sk;
0481 return sk;
0482 }
0483
0484
0485
0486
0487 struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
0488 __be16 sport, __be32 daddr, __be16 dport, int dif,
0489 int sdif, struct udp_table *udptable, struct sk_buff *skb)
0490 {
0491 unsigned short hnum = ntohs(dport);
0492 unsigned int hash2, slot2;
0493 struct udp_hslot *hslot2;
0494 struct sock *result, *sk;
0495
0496 hash2 = ipv4_portaddr_hash(net, daddr, hnum);
0497 slot2 = hash2 & udptable->mask;
0498 hslot2 = &udptable->hash2[slot2];
0499
0500
0501 result = udp4_lib_lookup2(net, saddr, sport,
0502 daddr, hnum, dif, sdif,
0503 hslot2, skb);
0504 if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
0505 goto done;
0506
0507
0508 if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
0509 sk = udp4_lookup_run_bpf(net, udptable, skb,
0510 saddr, sport, daddr, hnum, dif);
0511 if (sk) {
0512 result = sk;
0513 goto done;
0514 }
0515 }
0516
0517
0518 if (result)
0519 goto done;
0520
0521
0522 hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
0523 slot2 = hash2 & udptable->mask;
0524 hslot2 = &udptable->hash2[slot2];
0525
0526 result = udp4_lib_lookup2(net, saddr, sport,
0527 htonl(INADDR_ANY), hnum, dif, sdif,
0528 hslot2, skb);
0529 done:
0530 if (IS_ERR(result))
0531 return NULL;
0532 return result;
0533 }
0534 EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
0535
0536 static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
0537 __be16 sport, __be16 dport,
0538 struct udp_table *udptable)
0539 {
0540 const struct iphdr *iph = ip_hdr(skb);
0541
0542 return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
0543 iph->daddr, dport, inet_iif(skb),
0544 inet_sdif(skb), udptable, skb);
0545 }
0546
0547 struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
0548 __be16 sport, __be16 dport)
0549 {
0550 const struct iphdr *iph = ip_hdr(skb);
0551
0552 return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
0553 iph->daddr, dport, inet_iif(skb),
0554 inet_sdif(skb), &udp_table, NULL);
0555 }
0556
0557
0558
0559
0560 #if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) || IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
0561 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
0562 __be32 daddr, __be16 dport, int dif)
0563 {
0564 struct sock *sk;
0565
0566 sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
0567 dif, 0, &udp_table, NULL);
0568 if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
0569 sk = NULL;
0570 return sk;
0571 }
0572 EXPORT_SYMBOL_GPL(udp4_lib_lookup);
0573 #endif
0574
0575 static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
0576 __be16 loc_port, __be32 loc_addr,
0577 __be16 rmt_port, __be32 rmt_addr,
0578 int dif, int sdif, unsigned short hnum)
0579 {
0580 struct inet_sock *inet = inet_sk(sk);
0581
0582 if (!net_eq(sock_net(sk), net) ||
0583 udp_sk(sk)->udp_port_hash != hnum ||
0584 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
0585 (inet->inet_dport != rmt_port && inet->inet_dport) ||
0586 (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
0587 ipv6_only_sock(sk) ||
0588 !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
0589 return false;
0590 if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
0591 return false;
0592 return true;
0593 }
0594
0595 DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
0596 void udp_encap_enable(void)
0597 {
0598 static_branch_inc(&udp_encap_needed_key);
0599 }
0600 EXPORT_SYMBOL(udp_encap_enable);
0601
0602 void udp_encap_disable(void)
0603 {
0604 static_branch_dec(&udp_encap_needed_key);
0605 }
0606 EXPORT_SYMBOL(udp_encap_disable);
0607
0608
0609
0610
0611 static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info)
0612 {
0613 int i;
0614
0615 for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
0616 int (*handler)(struct sk_buff *skb, u32 info);
0617 const struct ip_tunnel_encap_ops *encap;
0618
0619 encap = rcu_dereference(iptun_encaps[i]);
0620 if (!encap)
0621 continue;
0622 handler = encap->err_handler;
0623 if (handler && !handler(skb, info))
0624 return 0;
0625 }
0626
0627 return -ENOENT;
0628 }
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647 static struct sock *__udp4_lib_err_encap(struct net *net,
0648 const struct iphdr *iph,
0649 struct udphdr *uh,
0650 struct udp_table *udptable,
0651 struct sock *sk,
0652 struct sk_buff *skb, u32 info)
0653 {
0654 int (*lookup)(struct sock *sk, struct sk_buff *skb);
0655 int network_offset, transport_offset;
0656 struct udp_sock *up;
0657
0658 network_offset = skb_network_offset(skb);
0659 transport_offset = skb_transport_offset(skb);
0660
0661
0662 skb_reset_network_header(skb);
0663
0664
0665 skb_set_transport_header(skb, iph->ihl << 2);
0666
0667 if (sk) {
0668 up = udp_sk(sk);
0669
0670 lookup = READ_ONCE(up->encap_err_lookup);
0671 if (lookup && lookup(sk, skb))
0672 sk = NULL;
0673
0674 goto out;
0675 }
0676
0677 sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
0678 iph->saddr, uh->dest, skb->dev->ifindex, 0,
0679 udptable, NULL);
0680 if (sk) {
0681 up = udp_sk(sk);
0682
0683 lookup = READ_ONCE(up->encap_err_lookup);
0684 if (!lookup || lookup(sk, skb))
0685 sk = NULL;
0686 }
0687
0688 out:
0689 if (!sk)
0690 sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
0691
0692 skb_set_transport_header(skb, transport_offset);
0693 skb_set_network_header(skb, network_offset);
0694
0695 return sk;
0696 }
0697
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709 int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
0710 {
0711 struct inet_sock *inet;
0712 const struct iphdr *iph = (const struct iphdr *)skb->data;
0713 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
0714 const int type = icmp_hdr(skb)->type;
0715 const int code = icmp_hdr(skb)->code;
0716 bool tunnel = false;
0717 struct sock *sk;
0718 int harderr;
0719 int err;
0720 struct net *net = dev_net(skb->dev);
0721
0722 sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
0723 iph->saddr, uh->source, skb->dev->ifindex,
0724 inet_sdif(skb), udptable, NULL);
0725
0726 if (!sk || udp_sk(sk)->encap_type) {
0727
0728 if (static_branch_unlikely(&udp_encap_needed_key)) {
0729 sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
0730 info);
0731 if (!sk)
0732 return 0;
0733 } else
0734 sk = ERR_PTR(-ENOENT);
0735
0736 if (IS_ERR(sk)) {
0737 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
0738 return PTR_ERR(sk);
0739 }
0740
0741 tunnel = true;
0742 }
0743
0744 err = 0;
0745 harderr = 0;
0746 inet = inet_sk(sk);
0747
0748 switch (type) {
0749 default:
0750 case ICMP_TIME_EXCEEDED:
0751 err = EHOSTUNREACH;
0752 break;
0753 case ICMP_SOURCE_QUENCH:
0754 goto out;
0755 case ICMP_PARAMETERPROB:
0756 err = EPROTO;
0757 harderr = 1;
0758 break;
0759 case ICMP_DEST_UNREACH:
0760 if (code == ICMP_FRAG_NEEDED) {
0761 ipv4_sk_update_pmtu(skb, sk, info);
0762 if (inet->pmtudisc != IP_PMTUDISC_DONT) {
0763 err = EMSGSIZE;
0764 harderr = 1;
0765 break;
0766 }
0767 goto out;
0768 }
0769 err = EHOSTUNREACH;
0770 if (code <= NR_ICMP_UNREACH) {
0771 harderr = icmp_err_convert[code].fatal;
0772 err = icmp_err_convert[code].errno;
0773 }
0774 break;
0775 case ICMP_REDIRECT:
0776 ipv4_sk_redirect(skb, sk);
0777 goto out;
0778 }
0779
0780
0781
0782
0783
0784 if (tunnel) {
0785
0786 if (udp_sk(sk)->encap_err_rcv)
0787 udp_sk(sk)->encap_err_rcv(sk, skb, iph->ihl << 2);
0788 goto out;
0789 }
0790 if (!inet->recverr) {
0791 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
0792 goto out;
0793 } else
0794 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
0795
0796 sk->sk_err = err;
0797 sk_error_report(sk);
0798 out:
0799 return 0;
0800 }
0801
0802 int udp_err(struct sk_buff *skb, u32 info)
0803 {
0804 return __udp4_lib_err(skb, info, &udp_table);
0805 }
0806
0807
0808
0809
0810 void udp_flush_pending_frames(struct sock *sk)
0811 {
0812 struct udp_sock *up = udp_sk(sk);
0813
0814 if (up->pending) {
0815 up->len = 0;
0816 up->pending = 0;
0817 ip_flush_pending_frames(sk);
0818 }
0819 }
0820 EXPORT_SYMBOL(udp_flush_pending_frames);
0821
0822
0823
0824
0825
0826
0827
0828
0829 void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
0830 {
0831 struct udphdr *uh = udp_hdr(skb);
0832 int offset = skb_transport_offset(skb);
0833 int len = skb->len - offset;
0834 int hlen = len;
0835 __wsum csum = 0;
0836
0837 if (!skb_has_frag_list(skb)) {
0838
0839
0840
0841 skb->csum_start = skb_transport_header(skb) - skb->head;
0842 skb->csum_offset = offsetof(struct udphdr, check);
0843 uh->check = ~csum_tcpudp_magic(src, dst, len,
0844 IPPROTO_UDP, 0);
0845 } else {
0846 struct sk_buff *frags;
0847
0848
0849
0850
0851
0852
0853 skb_walk_frags(skb, frags) {
0854 csum = csum_add(csum, frags->csum);
0855 hlen -= frags->len;
0856 }
0857
0858 csum = skb_checksum(skb, offset, hlen, csum);
0859 skb->ip_summed = CHECKSUM_NONE;
0860
0861 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
0862 if (uh->check == 0)
0863 uh->check = CSUM_MANGLED_0;
0864 }
0865 }
0866 EXPORT_SYMBOL_GPL(udp4_hwcsum);
0867
0868
0869
0870
0871 void udp_set_csum(bool nocheck, struct sk_buff *skb,
0872 __be32 saddr, __be32 daddr, int len)
0873 {
0874 struct udphdr *uh = udp_hdr(skb);
0875
0876 if (nocheck) {
0877 uh->check = 0;
0878 } else if (skb_is_gso(skb)) {
0879 uh->check = ~udp_v4_check(len, saddr, daddr, 0);
0880 } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
0881 uh->check = 0;
0882 uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
0883 if (uh->check == 0)
0884 uh->check = CSUM_MANGLED_0;
0885 } else {
0886 skb->ip_summed = CHECKSUM_PARTIAL;
0887 skb->csum_start = skb_transport_header(skb) - skb->head;
0888 skb->csum_offset = offsetof(struct udphdr, check);
0889 uh->check = ~udp_v4_check(len, saddr, daddr, 0);
0890 }
0891 }
0892 EXPORT_SYMBOL(udp_set_csum);
0893
0894 static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
0895 struct inet_cork *cork)
0896 {
0897 struct sock *sk = skb->sk;
0898 struct inet_sock *inet = inet_sk(sk);
0899 struct udphdr *uh;
0900 int err;
0901 int is_udplite = IS_UDPLITE(sk);
0902 int offset = skb_transport_offset(skb);
0903 int len = skb->len - offset;
0904 int datalen = len - sizeof(*uh);
0905 __wsum csum = 0;
0906
0907
0908
0909
0910 uh = udp_hdr(skb);
0911 uh->source = inet->inet_sport;
0912 uh->dest = fl4->fl4_dport;
0913 uh->len = htons(len);
0914 uh->check = 0;
0915
0916 if (cork->gso_size) {
0917 const int hlen = skb_network_header_len(skb) +
0918 sizeof(struct udphdr);
0919
0920 if (hlen + cork->gso_size > cork->fragsize) {
0921 kfree_skb(skb);
0922 return -EINVAL;
0923 }
0924 if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
0925 kfree_skb(skb);
0926 return -EINVAL;
0927 }
0928 if (sk->sk_no_check_tx) {
0929 kfree_skb(skb);
0930 return -EINVAL;
0931 }
0932 if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
0933 dst_xfrm(skb_dst(skb))) {
0934 kfree_skb(skb);
0935 return -EIO;
0936 }
0937
0938 if (datalen > cork->gso_size) {
0939 skb_shinfo(skb)->gso_size = cork->gso_size;
0940 skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
0941 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
0942 cork->gso_size);
0943 }
0944 goto csum_partial;
0945 }
0946
0947 if (is_udplite)
0948 csum = udplite_csum(skb);
0949
0950 else if (sk->sk_no_check_tx) {
0951
0952 skb->ip_summed = CHECKSUM_NONE;
0953 goto send;
0954
0955 } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
0956 csum_partial:
0957
0958 udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
0959 goto send;
0960
0961 } else
0962 csum = udp_csum(skb);
0963
0964
0965 uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len,
0966 sk->sk_protocol, csum);
0967 if (uh->check == 0)
0968 uh->check = CSUM_MANGLED_0;
0969
0970 send:
0971 err = ip_send_skb(sock_net(sk), skb);
0972 if (err) {
0973 if (err == -ENOBUFS && !inet->recverr) {
0974 UDP_INC_STATS(sock_net(sk),
0975 UDP_MIB_SNDBUFERRORS, is_udplite);
0976 err = 0;
0977 }
0978 } else
0979 UDP_INC_STATS(sock_net(sk),
0980 UDP_MIB_OUTDATAGRAMS, is_udplite);
0981 return err;
0982 }
0983
0984
0985
0986
0987 int udp_push_pending_frames(struct sock *sk)
0988 {
0989 struct udp_sock *up = udp_sk(sk);
0990 struct inet_sock *inet = inet_sk(sk);
0991 struct flowi4 *fl4 = &inet->cork.fl.u.ip4;
0992 struct sk_buff *skb;
0993 int err = 0;
0994
0995 skb = ip_finish_skb(sk, fl4);
0996 if (!skb)
0997 goto out;
0998
0999 err = udp_send_skb(skb, fl4, &inet->cork.base);
1000
1001 out:
1002 up->len = 0;
1003 up->pending = 0;
1004 return err;
1005 }
1006 EXPORT_SYMBOL(udp_push_pending_frames);
1007
1008 static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size)
1009 {
1010 switch (cmsg->cmsg_type) {
1011 case UDP_SEGMENT:
1012 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16)))
1013 return -EINVAL;
1014 *gso_size = *(__u16 *)CMSG_DATA(cmsg);
1015 return 0;
1016 default:
1017 return -EINVAL;
1018 }
1019 }
1020
1021 int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size)
1022 {
1023 struct cmsghdr *cmsg;
1024 bool need_ip = false;
1025 int err;
1026
1027 for_each_cmsghdr(cmsg, msg) {
1028 if (!CMSG_OK(msg, cmsg))
1029 return -EINVAL;
1030
1031 if (cmsg->cmsg_level != SOL_UDP) {
1032 need_ip = true;
1033 continue;
1034 }
1035
1036 err = __udp_cmsg_send(cmsg, gso_size);
1037 if (err)
1038 return err;
1039 }
1040
1041 return need_ip;
1042 }
1043 EXPORT_SYMBOL_GPL(udp_cmsg_send);
1044
1045 int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1046 {
1047 struct inet_sock *inet = inet_sk(sk);
1048 struct udp_sock *up = udp_sk(sk);
1049 DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
1050 struct flowi4 fl4_stack;
1051 struct flowi4 *fl4;
1052 int ulen = len;
1053 struct ipcm_cookie ipc;
1054 struct rtable *rt = NULL;
1055 int free = 0;
1056 int connected = 0;
1057 __be32 daddr, faddr, saddr;
1058 __be16 dport;
1059 u8 tos;
1060 int err, is_udplite = IS_UDPLITE(sk);
1061 int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
1062 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
1063 struct sk_buff *skb;
1064 struct ip_options_data opt_copy;
1065
1066 if (len > 0xFFFF)
1067 return -EMSGSIZE;
1068
1069
1070
1071
1072
1073 if (msg->msg_flags & MSG_OOB)
1074 return -EOPNOTSUPP;
1075
1076 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
1077
1078 fl4 = &inet->cork.fl.u.ip4;
1079 if (up->pending) {
1080
1081
1082
1083
1084 lock_sock(sk);
1085 if (likely(up->pending)) {
1086 if (unlikely(up->pending != AF_INET)) {
1087 release_sock(sk);
1088 return -EINVAL;
1089 }
1090 goto do_append_data;
1091 }
1092 release_sock(sk);
1093 }
1094 ulen += sizeof(struct udphdr);
1095
1096
1097
1098
1099 if (usin) {
1100 if (msg->msg_namelen < sizeof(*usin))
1101 return -EINVAL;
1102 if (usin->sin_family != AF_INET) {
1103 if (usin->sin_family != AF_UNSPEC)
1104 return -EAFNOSUPPORT;
1105 }
1106
1107 daddr = usin->sin_addr.s_addr;
1108 dport = usin->sin_port;
1109 if (dport == 0)
1110 return -EINVAL;
1111 } else {
1112 if (sk->sk_state != TCP_ESTABLISHED)
1113 return -EDESTADDRREQ;
1114 daddr = inet->inet_daddr;
1115 dport = inet->inet_dport;
1116
1117
1118
1119 connected = 1;
1120 }
1121
1122 ipcm_init_sk(&ipc, inet);
1123 ipc.gso_size = READ_ONCE(up->gso_size);
1124
1125 if (msg->msg_controllen) {
1126 err = udp_cmsg_send(sk, msg, &ipc.gso_size);
1127 if (err > 0)
1128 err = ip_cmsg_send(sk, msg, &ipc,
1129 sk->sk_family == AF_INET6);
1130 if (unlikely(err < 0)) {
1131 kfree(ipc.opt);
1132 return err;
1133 }
1134 if (ipc.opt)
1135 free = 1;
1136 connected = 0;
1137 }
1138 if (!ipc.opt) {
1139 struct ip_options_rcu *inet_opt;
1140
1141 rcu_read_lock();
1142 inet_opt = rcu_dereference(inet->inet_opt);
1143 if (inet_opt) {
1144 memcpy(&opt_copy, inet_opt,
1145 sizeof(*inet_opt) + inet_opt->opt.optlen);
1146 ipc.opt = &opt_copy.opt;
1147 }
1148 rcu_read_unlock();
1149 }
1150
1151 if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
1152 err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
1153 (struct sockaddr *)usin, &ipc.addr);
1154 if (err)
1155 goto out_free;
1156 if (usin) {
1157 if (usin->sin_port == 0) {
1158
1159 err = -EINVAL;
1160 goto out_free;
1161 }
1162 daddr = usin->sin_addr.s_addr;
1163 dport = usin->sin_port;
1164 }
1165 }
1166
1167 saddr = ipc.addr;
1168 ipc.addr = faddr = daddr;
1169
1170 if (ipc.opt && ipc.opt->opt.srr) {
1171 if (!daddr) {
1172 err = -EINVAL;
1173 goto out_free;
1174 }
1175 faddr = ipc.opt->opt.faddr;
1176 connected = 0;
1177 }
1178 tos = get_rttos(&ipc, inet);
1179 if (sock_flag(sk, SOCK_LOCALROUTE) ||
1180 (msg->msg_flags & MSG_DONTROUTE) ||
1181 (ipc.opt && ipc.opt->opt.is_strictroute)) {
1182 tos |= RTO_ONLINK;
1183 connected = 0;
1184 }
1185
1186 if (ipv4_is_multicast(daddr)) {
1187 if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
1188 ipc.oif = inet->mc_index;
1189 if (!saddr)
1190 saddr = inet->mc_addr;
1191 connected = 0;
1192 } else if (!ipc.oif) {
1193 ipc.oif = inet->uc_index;
1194 } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
1195
1196
1197
1198
1199
1200
1201 if (ipc.oif != inet->uc_index &&
1202 ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
1203 inet->uc_index)) {
1204 ipc.oif = inet->uc_index;
1205 }
1206 }
1207
1208 if (connected)
1209 rt = (struct rtable *)sk_dst_check(sk, 0);
1210
1211 if (!rt) {
1212 struct net *net = sock_net(sk);
1213 __u8 flow_flags = inet_sk_flowi_flags(sk);
1214
1215 fl4 = &fl4_stack;
1216
1217 flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
1218 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1219 flow_flags,
1220 faddr, saddr, dport, inet->inet_sport,
1221 sk->sk_uid);
1222
1223 security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
1224 rt = ip_route_output_flow(net, fl4, sk);
1225 if (IS_ERR(rt)) {
1226 err = PTR_ERR(rt);
1227 rt = NULL;
1228 if (err == -ENETUNREACH)
1229 IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
1230 goto out;
1231 }
1232
1233 err = -EACCES;
1234 if ((rt->rt_flags & RTCF_BROADCAST) &&
1235 !sock_flag(sk, SOCK_BROADCAST))
1236 goto out;
1237 if (connected)
1238 sk_dst_set(sk, dst_clone(&rt->dst));
1239 }
1240
1241 if (msg->msg_flags&MSG_CONFIRM)
1242 goto do_confirm;
1243 back_from_confirm:
1244
1245 saddr = fl4->saddr;
1246 if (!ipc.addr)
1247 daddr = ipc.addr = fl4->daddr;
1248
1249
1250 if (!corkreq) {
1251 struct inet_cork cork;
1252
1253 skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
1254 sizeof(struct udphdr), &ipc, &rt,
1255 &cork, msg->msg_flags);
1256 err = PTR_ERR(skb);
1257 if (!IS_ERR_OR_NULL(skb))
1258 err = udp_send_skb(skb, fl4, &cork);
1259 goto out;
1260 }
1261
1262 lock_sock(sk);
1263 if (unlikely(up->pending)) {
1264
1265
1266 release_sock(sk);
1267
1268 net_dbg_ratelimited("socket already corked\n");
1269 err = -EINVAL;
1270 goto out;
1271 }
1272
1273
1274
1275 fl4 = &inet->cork.fl.u.ip4;
1276 fl4->daddr = daddr;
1277 fl4->saddr = saddr;
1278 fl4->fl4_dport = dport;
1279 fl4->fl4_sport = inet->inet_sport;
1280 up->pending = AF_INET;
1281
1282 do_append_data:
1283 up->len += ulen;
1284 err = ip_append_data(sk, fl4, getfrag, msg, ulen,
1285 sizeof(struct udphdr), &ipc, &rt,
1286 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
1287 if (err)
1288 udp_flush_pending_frames(sk);
1289 else if (!corkreq)
1290 err = udp_push_pending_frames(sk);
1291 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
1292 up->pending = 0;
1293 release_sock(sk);
1294
1295 out:
1296 ip_rt_put(rt);
1297 out_free:
1298 if (free)
1299 kfree(ipc.opt);
1300 if (!err)
1301 return len;
1302
1303
1304
1305
1306
1307
1308
1309 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1310 UDP_INC_STATS(sock_net(sk),
1311 UDP_MIB_SNDBUFERRORS, is_udplite);
1312 }
1313 return err;
1314
1315 do_confirm:
1316 if (msg->msg_flags & MSG_PROBE)
1317 dst_confirm_neigh(&rt->dst, &fl4->daddr);
1318 if (!(msg->msg_flags&MSG_PROBE) || len)
1319 goto back_from_confirm;
1320 err = 0;
1321 goto out;
1322 }
1323 EXPORT_SYMBOL(udp_sendmsg);
1324
1325 int udp_sendpage(struct sock *sk, struct page *page, int offset,
1326 size_t size, int flags)
1327 {
1328 struct inet_sock *inet = inet_sk(sk);
1329 struct udp_sock *up = udp_sk(sk);
1330 int ret;
1331
1332 if (flags & MSG_SENDPAGE_NOTLAST)
1333 flags |= MSG_MORE;
1334
1335 if (!up->pending) {
1336 struct msghdr msg = { .msg_flags = flags|MSG_MORE };
1337
1338
1339
1340
1341
1342 ret = udp_sendmsg(sk, &msg, 0);
1343 if (ret < 0)
1344 return ret;
1345 }
1346
1347 lock_sock(sk);
1348
1349 if (unlikely(!up->pending)) {
1350 release_sock(sk);
1351
1352 net_dbg_ratelimited("cork failed\n");
1353 return -EINVAL;
1354 }
1355
1356 ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
1357 page, offset, size, flags);
1358 if (ret == -EOPNOTSUPP) {
1359 release_sock(sk);
1360 return sock_no_sendpage(sk->sk_socket, page, offset,
1361 size, flags);
1362 }
1363 if (ret < 0) {
1364 udp_flush_pending_frames(sk);
1365 goto out;
1366 }
1367
1368 up->len += size;
1369 if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
1370 ret = udp_push_pending_frames(sk);
1371 if (!ret)
1372 ret = size;
1373 out:
1374 release_sock(sk);
1375 return ret;
1376 }
1377
1378 #define UDP_SKB_IS_STATELESS 0x80000000
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388 static bool udp_try_make_stateless(struct sk_buff *skb)
1389 {
1390 if (!skb_has_extensions(skb))
1391 return true;
1392
1393 if (!secpath_exists(skb)) {
1394 skb_ext_reset(skb);
1395 return true;
1396 }
1397
1398 return false;
1399 }
1400
1401 static void udp_set_dev_scratch(struct sk_buff *skb)
1402 {
1403 struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
1404
1405 BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
1406 scratch->_tsize_state = skb->truesize;
1407 #if BITS_PER_LONG == 64
1408 scratch->len = skb->len;
1409 scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
1410 scratch->is_linear = !skb_is_nonlinear(skb);
1411 #endif
1412 if (udp_try_make_stateless(skb))
1413 scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
1414 }
1415
1416 static void udp_skb_csum_unnecessary_set(struct sk_buff *skb)
1417 {
1418
1419
1420
1421
1422
1423
1424 #if BITS_PER_LONG == 64
1425 if (!skb_shared(skb))
1426 udp_skb_scratch(skb)->csum_unnecessary = true;
1427 #endif
1428 }
1429
1430 static int udp_skb_truesize(struct sk_buff *skb)
1431 {
1432 return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
1433 }
1434
1435 static bool udp_skb_has_head_state(struct sk_buff *skb)
1436 {
1437 return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
1438 }
1439
1440
1441 static void udp_rmem_release(struct sock *sk, int size, int partial,
1442 bool rx_queue_lock_held)
1443 {
1444 struct udp_sock *up = udp_sk(sk);
1445 struct sk_buff_head *sk_queue;
1446 int amt;
1447
1448 if (likely(partial)) {
1449 up->forward_deficit += size;
1450 size = up->forward_deficit;
1451 if (size < (sk->sk_rcvbuf >> 2) &&
1452 !skb_queue_empty(&up->reader_queue))
1453 return;
1454 } else {
1455 size += up->forward_deficit;
1456 }
1457 up->forward_deficit = 0;
1458
1459
1460
1461
1462 sk_queue = &sk->sk_receive_queue;
1463 if (!rx_queue_lock_held)
1464 spin_lock(&sk_queue->lock);
1465
1466
1467 sk->sk_forward_alloc += size;
1468 amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
1469 sk->sk_forward_alloc -= amt;
1470
1471 if (amt)
1472 __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
1473
1474 atomic_sub(size, &sk->sk_rmem_alloc);
1475
1476
1477 skb_queue_splice_tail_init(sk_queue, &up->reader_queue);
1478
1479 if (!rx_queue_lock_held)
1480 spin_unlock(&sk_queue->lock);
1481 }
1482
1483
1484
1485
1486
1487
1488 void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
1489 {
1490 prefetch(&skb->data);
1491 udp_rmem_release(sk, udp_skb_truesize(skb), 1, false);
1492 }
1493 EXPORT_SYMBOL(udp_skb_destructor);
1494
1495
1496 static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
1497 {
1498 prefetch(&skb->data);
1499 udp_rmem_release(sk, udp_skb_truesize(skb), 1, true);
1500 }
1501
1502
1503
1504
1505
1506
1507
1508
1509 static int udp_busylocks_log __read_mostly;
1510 static spinlock_t *udp_busylocks __read_mostly;
1511
1512 static spinlock_t *busylock_acquire(void *ptr)
1513 {
1514 spinlock_t *busy;
1515
1516 busy = udp_busylocks + hash_ptr(ptr, udp_busylocks_log);
1517 spin_lock(busy);
1518 return busy;
1519 }
1520
1521 static void busylock_release(spinlock_t *busy)
1522 {
1523 if (busy)
1524 spin_unlock(busy);
1525 }
1526
1527 int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
1528 {
1529 struct sk_buff_head *list = &sk->sk_receive_queue;
1530 int rmem, delta, amt, err = -ENOMEM;
1531 spinlock_t *busy = NULL;
1532 int size;
1533
1534
1535
1536
1537 rmem = atomic_read(&sk->sk_rmem_alloc);
1538 if (rmem > sk->sk_rcvbuf)
1539 goto drop;
1540
1541
1542
1543
1544
1545
1546
1547 if (rmem > (sk->sk_rcvbuf >> 1)) {
1548 skb_condense(skb);
1549
1550 busy = busylock_acquire(sk);
1551 }
1552 size = skb->truesize;
1553 udp_set_dev_scratch(skb);
1554
1555
1556
1557
1558 rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
1559 if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
1560 goto uncharge_drop;
1561
1562 spin_lock(&list->lock);
1563 if (size >= sk->sk_forward_alloc) {
1564 amt = sk_mem_pages(size);
1565 delta = amt << PAGE_SHIFT;
1566 if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
1567 err = -ENOBUFS;
1568 spin_unlock(&list->lock);
1569 goto uncharge_drop;
1570 }
1571
1572 sk->sk_forward_alloc += delta;
1573 }
1574
1575 sk->sk_forward_alloc -= size;
1576
1577
1578
1579
1580 sock_skb_set_dropcount(sk, skb);
1581
1582 __skb_queue_tail(list, skb);
1583 spin_unlock(&list->lock);
1584
1585 if (!sock_flag(sk, SOCK_DEAD))
1586 sk->sk_data_ready(sk);
1587
1588 busylock_release(busy);
1589 return 0;
1590
1591 uncharge_drop:
1592 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1593
1594 drop:
1595 atomic_inc(&sk->sk_drops);
1596 busylock_release(busy);
1597 return err;
1598 }
1599 EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
1600
1601 void udp_destruct_sock(struct sock *sk)
1602 {
1603
1604 struct udp_sock *up = udp_sk(sk);
1605 unsigned int total = 0;
1606 struct sk_buff *skb;
1607
1608 skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue);
1609 while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) {
1610 total += skb->truesize;
1611 kfree_skb(skb);
1612 }
1613 udp_rmem_release(sk, total, 0, true);
1614
1615 inet_sock_destruct(sk);
1616 }
1617 EXPORT_SYMBOL_GPL(udp_destruct_sock);
1618
1619 int udp_init_sock(struct sock *sk)
1620 {
1621 skb_queue_head_init(&udp_sk(sk)->reader_queue);
1622 sk->sk_destruct = udp_destruct_sock;
1623 return 0;
1624 }
1625 EXPORT_SYMBOL_GPL(udp_init_sock);
1626
1627 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
1628 {
1629 if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
1630 bool slow = lock_sock_fast(sk);
1631
1632 sk_peek_offset_bwd(sk, len);
1633 unlock_sock_fast(sk, slow);
1634 }
1635
1636 if (!skb_unref(skb))
1637 return;
1638
1639
1640
1641
1642 if (unlikely(udp_skb_has_head_state(skb)))
1643 skb_release_head_state(skb);
1644 __consume_stateless_skb(skb);
1645 }
1646 EXPORT_SYMBOL_GPL(skb_consume_udp);
1647
1648 static struct sk_buff *__first_packet_length(struct sock *sk,
1649 struct sk_buff_head *rcvq,
1650 int *total)
1651 {
1652 struct sk_buff *skb;
1653
1654 while ((skb = skb_peek(rcvq)) != NULL) {
1655 if (udp_lib_checksum_complete(skb)) {
1656 __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
1657 IS_UDPLITE(sk));
1658 __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
1659 IS_UDPLITE(sk));
1660 atomic_inc(&sk->sk_drops);
1661 __skb_unlink(skb, rcvq);
1662 *total += skb->truesize;
1663 kfree_skb(skb);
1664 } else {
1665 udp_skb_csum_unnecessary_set(skb);
1666 break;
1667 }
1668 }
1669 return skb;
1670 }
1671
1672
1673
1674
1675
1676
1677
1678
1679 static int first_packet_length(struct sock *sk)
1680 {
1681 struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
1682 struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
1683 struct sk_buff *skb;
1684 int total = 0;
1685 int res;
1686
1687 spin_lock_bh(&rcvq->lock);
1688 skb = __first_packet_length(sk, rcvq, &total);
1689 if (!skb && !skb_queue_empty_lockless(sk_queue)) {
1690 spin_lock(&sk_queue->lock);
1691 skb_queue_splice_tail_init(sk_queue, rcvq);
1692 spin_unlock(&sk_queue->lock);
1693
1694 skb = __first_packet_length(sk, rcvq, &total);
1695 }
1696 res = skb ? skb->len : -1;
1697 if (total)
1698 udp_rmem_release(sk, total, 1, false);
1699 spin_unlock_bh(&rcvq->lock);
1700 return res;
1701 }
1702
1703
1704
1705
1706
1707 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
1708 {
1709 switch (cmd) {
1710 case SIOCOUTQ:
1711 {
1712 int amount = sk_wmem_alloc_get(sk);
1713
1714 return put_user(amount, (int __user *)arg);
1715 }
1716
1717 case SIOCINQ:
1718 {
1719 int amount = max_t(int, 0, first_packet_length(sk));
1720
1721 return put_user(amount, (int __user *)arg);
1722 }
1723
1724 default:
1725 return -ENOIOCTLCMD;
1726 }
1727
1728 return 0;
1729 }
1730 EXPORT_SYMBOL(udp_ioctl);
1731
1732 struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
1733 int *off, int *err)
1734 {
1735 struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
1736 struct sk_buff_head *queue;
1737 struct sk_buff *last;
1738 long timeo;
1739 int error;
1740
1741 queue = &udp_sk(sk)->reader_queue;
1742 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1743 do {
1744 struct sk_buff *skb;
1745
1746 error = sock_error(sk);
1747 if (error)
1748 break;
1749
1750 error = -EAGAIN;
1751 do {
1752 spin_lock_bh(&queue->lock);
1753 skb = __skb_try_recv_from_queue(sk, queue, flags, off,
1754 err, &last);
1755 if (skb) {
1756 if (!(flags & MSG_PEEK))
1757 udp_skb_destructor(sk, skb);
1758 spin_unlock_bh(&queue->lock);
1759 return skb;
1760 }
1761
1762 if (skb_queue_empty_lockless(sk_queue)) {
1763 spin_unlock_bh(&queue->lock);
1764 goto busy_check;
1765 }
1766
1767
1768
1769
1770
1771
1772 spin_lock(&sk_queue->lock);
1773 skb_queue_splice_tail_init(sk_queue, queue);
1774
1775 skb = __skb_try_recv_from_queue(sk, queue, flags, off,
1776 err, &last);
1777 if (skb && !(flags & MSG_PEEK))
1778 udp_skb_dtor_locked(sk, skb);
1779 spin_unlock(&sk_queue->lock);
1780 spin_unlock_bh(&queue->lock);
1781 if (skb)
1782 return skb;
1783
1784 busy_check:
1785 if (!sk_can_busy_loop(sk))
1786 break;
1787
1788 sk_busy_loop(sk, flags & MSG_DONTWAIT);
1789 } while (!skb_queue_empty_lockless(sk_queue));
1790
1791
1792 } while (timeo &&
1793 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
1794 &error, &timeo,
1795 (struct sk_buff *)sk_queue));
1796
1797 *err = error;
1798 return NULL;
1799 }
1800 EXPORT_SYMBOL(__skb_recv_udp);
1801
1802 int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
1803 {
1804 int copied = 0;
1805
1806 while (1) {
1807 struct sk_buff *skb;
1808 int err, used;
1809
1810 skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
1811 if (!skb)
1812 return err;
1813
1814 if (udp_lib_checksum_complete(skb)) {
1815 __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
1816 IS_UDPLITE(sk));
1817 __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
1818 IS_UDPLITE(sk));
1819 atomic_inc(&sk->sk_drops);
1820 kfree_skb(skb);
1821 continue;
1822 }
1823
1824 WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
1825 used = recv_actor(sk, skb);
1826 if (used <= 0) {
1827 if (!copied)
1828 copied = used;
1829 kfree_skb(skb);
1830 break;
1831 } else if (used <= skb->len) {
1832 copied += used;
1833 }
1834
1835 kfree_skb(skb);
1836 break;
1837 }
1838
1839 return copied;
1840 }
1841 EXPORT_SYMBOL(udp_read_skb);
1842
1843
1844
1845
1846
1847
1848 int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
1849 int *addr_len)
1850 {
1851 struct inet_sock *inet = inet_sk(sk);
1852 DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
1853 struct sk_buff *skb;
1854 unsigned int ulen, copied;
1855 int off, err, peeking = flags & MSG_PEEK;
1856 int is_udplite = IS_UDPLITE(sk);
1857 bool checksum_valid = false;
1858
1859 if (flags & MSG_ERRQUEUE)
1860 return ip_recv_error(sk, msg, len, addr_len);
1861
1862 try_again:
1863 off = sk_peek_offset(sk, flags);
1864 skb = __skb_recv_udp(sk, flags, &off, &err);
1865 if (!skb)
1866 return err;
1867
1868 ulen = udp_skb_len(skb);
1869 copied = len;
1870 if (copied > ulen - off)
1871 copied = ulen - off;
1872 else if (copied < ulen)
1873 msg->msg_flags |= MSG_TRUNC;
1874
1875
1876
1877
1878
1879
1880
1881 if (copied < ulen || peeking ||
1882 (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
1883 checksum_valid = udp_skb_csum_unnecessary(skb) ||
1884 !__udp_lib_checksum_complete(skb);
1885 if (!checksum_valid)
1886 goto csum_copy_err;
1887 }
1888
1889 if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
1890 if (udp_skb_is_linear(skb))
1891 err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
1892 else
1893 err = skb_copy_datagram_msg(skb, off, msg, copied);
1894 } else {
1895 err = skb_copy_and_csum_datagram_msg(skb, off, msg);
1896
1897 if (err == -EINVAL)
1898 goto csum_copy_err;
1899 }
1900
1901 if (unlikely(err)) {
1902 if (!peeking) {
1903 atomic_inc(&sk->sk_drops);
1904 UDP_INC_STATS(sock_net(sk),
1905 UDP_MIB_INERRORS, is_udplite);
1906 }
1907 kfree_skb(skb);
1908 return err;
1909 }
1910
1911 if (!peeking)
1912 UDP_INC_STATS(sock_net(sk),
1913 UDP_MIB_INDATAGRAMS, is_udplite);
1914
1915 sock_recv_cmsgs(msg, sk, skb);
1916
1917
1918 if (sin) {
1919 sin->sin_family = AF_INET;
1920 sin->sin_port = udp_hdr(skb)->source;
1921 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
1922 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
1923 *addr_len = sizeof(*sin);
1924
1925 BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
1926 (struct sockaddr *)sin);
1927 }
1928
1929 if (udp_sk(sk)->gro_enabled)
1930 udp_cmsg_recv(msg, sk, skb);
1931
1932 if (inet->cmsg_flags)
1933 ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
1934
1935 err = copied;
1936 if (flags & MSG_TRUNC)
1937 err = ulen;
1938
1939 skb_consume_udp(sk, skb, peeking ? -err : err);
1940 return err;
1941
1942 csum_copy_err:
1943 if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
1944 udp_skb_destructor)) {
1945 UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
1946 UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1947 }
1948 kfree_skb(skb);
1949
1950
1951 cond_resched();
1952 msg->msg_flags &= ~MSG_TRUNC;
1953 goto try_again;
1954 }
1955
1956 int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1957 {
1958
1959
1960
1961
1962 if (addr_len < sizeof(struct sockaddr_in))
1963 return -EINVAL;
1964
1965 return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
1966 }
1967 EXPORT_SYMBOL(udp_pre_connect);
1968
1969 int __udp_disconnect(struct sock *sk, int flags)
1970 {
1971 struct inet_sock *inet = inet_sk(sk);
1972
1973
1974
1975
1976 sk->sk_state = TCP_CLOSE;
1977 inet->inet_daddr = 0;
1978 inet->inet_dport = 0;
1979 sock_rps_reset_rxhash(sk);
1980 sk->sk_bound_dev_if = 0;
1981 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) {
1982 inet_reset_saddr(sk);
1983 if (sk->sk_prot->rehash &&
1984 (sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1985 sk->sk_prot->rehash(sk);
1986 }
1987
1988 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
1989 sk->sk_prot->unhash(sk);
1990 inet->inet_sport = 0;
1991 }
1992 sk_dst_reset(sk);
1993 return 0;
1994 }
1995 EXPORT_SYMBOL(__udp_disconnect);
1996
1997 int udp_disconnect(struct sock *sk, int flags)
1998 {
1999 lock_sock(sk);
2000 __udp_disconnect(sk, flags);
2001 release_sock(sk);
2002 return 0;
2003 }
2004 EXPORT_SYMBOL(udp_disconnect);
2005
2006 void udp_lib_unhash(struct sock *sk)
2007 {
2008 if (sk_hashed(sk)) {
2009 struct udp_table *udptable = sk->sk_prot->h.udp_table;
2010 struct udp_hslot *hslot, *hslot2;
2011
2012 hslot = udp_hashslot(udptable, sock_net(sk),
2013 udp_sk(sk)->udp_port_hash);
2014 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
2015
2016 spin_lock_bh(&hslot->lock);
2017 if (rcu_access_pointer(sk->sk_reuseport_cb))
2018 reuseport_detach_sock(sk);
2019 if (sk_del_node_init_rcu(sk)) {
2020 hslot->count--;
2021 inet_sk(sk)->inet_num = 0;
2022 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
2023
2024 spin_lock(&hslot2->lock);
2025 hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
2026 hslot2->count--;
2027 spin_unlock(&hslot2->lock);
2028 }
2029 spin_unlock_bh(&hslot->lock);
2030 }
2031 }
2032 EXPORT_SYMBOL(udp_lib_unhash);
2033
2034
2035
2036
2037 void udp_lib_rehash(struct sock *sk, u16 newhash)
2038 {
2039 if (sk_hashed(sk)) {
2040 struct udp_table *udptable = sk->sk_prot->h.udp_table;
2041 struct udp_hslot *hslot, *hslot2, *nhslot2;
2042
2043 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
2044 nhslot2 = udp_hashslot2(udptable, newhash);
2045 udp_sk(sk)->udp_portaddr_hash = newhash;
2046
2047 if (hslot2 != nhslot2 ||
2048 rcu_access_pointer(sk->sk_reuseport_cb)) {
2049 hslot = udp_hashslot(udptable, sock_net(sk),
2050 udp_sk(sk)->udp_port_hash);
2051
2052 spin_lock_bh(&hslot->lock);
2053 if (rcu_access_pointer(sk->sk_reuseport_cb))
2054 reuseport_detach_sock(sk);
2055
2056 if (hslot2 != nhslot2) {
2057 spin_lock(&hslot2->lock);
2058 hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
2059 hslot2->count--;
2060 spin_unlock(&hslot2->lock);
2061
2062 spin_lock(&nhslot2->lock);
2063 hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
2064 &nhslot2->head);
2065 nhslot2->count++;
2066 spin_unlock(&nhslot2->lock);
2067 }
2068
2069 spin_unlock_bh(&hslot->lock);
2070 }
2071 }
2072 }
2073 EXPORT_SYMBOL(udp_lib_rehash);
2074
2075 void udp_v4_rehash(struct sock *sk)
2076 {
2077 u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
2078 inet_sk(sk)->inet_rcv_saddr,
2079 inet_sk(sk)->inet_num);
2080 udp_lib_rehash(sk, new_hash);
2081 }
2082
2083 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
2084 {
2085 int rc;
2086
2087 if (inet_sk(sk)->inet_daddr) {
2088 sock_rps_save_rxhash(sk, skb);
2089 sk_mark_napi_id(sk, skb);
2090 sk_incoming_cpu_update(sk);
2091 } else {
2092 sk_mark_napi_id_once(sk, skb);
2093 }
2094
2095 rc = __udp_enqueue_schedule_skb(sk, skb);
2096 if (rc < 0) {
2097 int is_udplite = IS_UDPLITE(sk);
2098 int drop_reason;
2099
2100
2101 if (rc == -ENOMEM) {
2102 UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
2103 is_udplite);
2104 drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
2105 } else {
2106 UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS,
2107 is_udplite);
2108 drop_reason = SKB_DROP_REASON_PROTO_MEM;
2109 }
2110 UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
2111 kfree_skb_reason(skb, drop_reason);
2112 trace_udp_fail_queue_rcv_skb(rc, sk);
2113 return -1;
2114 }
2115
2116 return 0;
2117 }
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127 static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
2128 {
2129 int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
2130 struct udp_sock *up = udp_sk(sk);
2131 int is_udplite = IS_UDPLITE(sk);
2132
2133
2134
2135
2136 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
2137 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
2138 goto drop;
2139 }
2140 nf_reset_ct(skb);
2141
2142 if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
2143 int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157 encap_rcv = READ_ONCE(up->encap_rcv);
2158 if (encap_rcv) {
2159 int ret;
2160
2161
2162 if (udp_lib_checksum_complete(skb))
2163 goto csum_error;
2164
2165 ret = encap_rcv(sk, skb);
2166 if (ret <= 0) {
2167 __UDP_INC_STATS(sock_net(sk),
2168 UDP_MIB_INDATAGRAMS,
2169 is_udplite);
2170 return -ret;
2171 }
2172 }
2173
2174
2175 }
2176
2177
2178
2179
2180 if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193 if (up->pcrlen == 0) {
2194 net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
2195 UDP_SKB_CB(skb)->cscov, skb->len);
2196 goto drop;
2197 }
2198
2199
2200
2201
2202
2203
2204 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
2205 net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
2206 UDP_SKB_CB(skb)->cscov, up->pcrlen);
2207 goto drop;
2208 }
2209 }
2210
2211 prefetch(&sk->sk_rmem_alloc);
2212 if (rcu_access_pointer(sk->sk_filter) &&
2213 udp_lib_checksum_complete(skb))
2214 goto csum_error;
2215
2216 if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
2217 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
2218 goto drop;
2219 }
2220
2221 udp_csum_pull_header(skb);
2222
2223 ipv4_pktinfo_prepare(sk, skb);
2224 return __udp_queue_rcv_skb(sk, skb);
2225
2226 csum_error:
2227 drop_reason = SKB_DROP_REASON_UDP_CSUM;
2228 __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
2229 drop:
2230 __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
2231 atomic_inc(&sk->sk_drops);
2232 kfree_skb_reason(skb, drop_reason);
2233 return -1;
2234 }
2235
2236 static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
2237 {
2238 struct sk_buff *next, *segs;
2239 int ret;
2240
2241 if (likely(!udp_unexpected_gso(sk, skb)))
2242 return udp_queue_rcv_one_skb(sk, skb);
2243
2244 BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET);
2245 __skb_push(skb, -skb_mac_offset(skb));
2246 segs = udp_rcv_segment(sk, skb, true);
2247 skb_list_walk_safe(segs, skb, next) {
2248 __skb_pull(skb, skb_transport_offset(skb));
2249
2250 udp_post_segment_fix_csum(skb);
2251 ret = udp_queue_rcv_one_skb(sk, skb);
2252 if (ret > 0)
2253 ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
2254 }
2255 return 0;
2256 }
2257
2258
2259
2260
2261 bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
2262 {
2263 struct dst_entry *old;
2264
2265 if (dst_hold_safe(dst)) {
2266 old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst);
2267 dst_release(old);
2268 return old != dst;
2269 }
2270 return false;
2271 }
2272 EXPORT_SYMBOL(udp_sk_rx_dst_set);
2273
2274
2275
2276
2277
2278
2279 static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
2280 struct udphdr *uh,
2281 __be32 saddr, __be32 daddr,
2282 struct udp_table *udptable,
2283 int proto)
2284 {
2285 struct sock *sk, *first = NULL;
2286 unsigned short hnum = ntohs(uh->dest);
2287 struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
2288 unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
2289 unsigned int offset = offsetof(typeof(*sk), sk_node);
2290 int dif = skb->dev->ifindex;
2291 int sdif = inet_sdif(skb);
2292 struct hlist_node *node;
2293 struct sk_buff *nskb;
2294
2295 if (use_hash2) {
2296 hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
2297 udptable->mask;
2298 hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
2299 start_lookup:
2300 hslot = &udptable->hash2[hash2];
2301 offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
2302 }
2303
2304 sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
2305 if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
2306 uh->source, saddr, dif, sdif, hnum))
2307 continue;
2308
2309 if (!first) {
2310 first = sk;
2311 continue;
2312 }
2313 nskb = skb_clone(skb, GFP_ATOMIC);
2314
2315 if (unlikely(!nskb)) {
2316 atomic_inc(&sk->sk_drops);
2317 __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
2318 IS_UDPLITE(sk));
2319 __UDP_INC_STATS(net, UDP_MIB_INERRORS,
2320 IS_UDPLITE(sk));
2321 continue;
2322 }
2323 if (udp_queue_rcv_skb(sk, nskb) > 0)
2324 consume_skb(nskb);
2325 }
2326
2327
2328 if (use_hash2 && hash2 != hash2_any) {
2329 hash2 = hash2_any;
2330 goto start_lookup;
2331 }
2332
2333 if (first) {
2334 if (udp_queue_rcv_skb(first, skb) > 0)
2335 consume_skb(skb);
2336 } else {
2337 kfree_skb(skb);
2338 __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
2339 proto == IPPROTO_UDPLITE);
2340 }
2341 return 0;
2342 }
2343
2344
2345
2346
2347
2348
2349 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
2350 int proto)
2351 {
2352 int err;
2353
2354 UDP_SKB_CB(skb)->partial_cov = 0;
2355 UDP_SKB_CB(skb)->cscov = skb->len;
2356
2357 if (proto == IPPROTO_UDPLITE) {
2358 err = udplite_checksum_init(skb, uh);
2359 if (err)
2360 return err;
2361
2362 if (UDP_SKB_CB(skb)->partial_cov) {
2363 skb->csum = inet_compute_pseudo(skb, proto);
2364 return 0;
2365 }
2366 }
2367
2368
2369
2370
2371 err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
2372 inet_compute_pseudo);
2373 if (err)
2374 return err;
2375
2376 if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
2377
2378 if (skb->csum_complete_sw)
2379 return 1;
2380
2381
2382
2383
2384
2385 skb_checksum_complete_unset(skb);
2386 }
2387
2388 return 0;
2389 }
2390
2391
2392
2393
2394 static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
2395 struct udphdr *uh)
2396 {
2397 int ret;
2398
2399 if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
2400 skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo);
2401
2402 ret = udp_queue_rcv_skb(sk, skb);
2403
2404
2405
2406
2407 if (ret > 0)
2408 return -ret;
2409 return 0;
2410 }
2411
2412
2413
2414
2415
2416 int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
2417 int proto)
2418 {
2419 struct sock *sk;
2420 struct udphdr *uh;
2421 unsigned short ulen;
2422 struct rtable *rt = skb_rtable(skb);
2423 __be32 saddr, daddr;
2424 struct net *net = dev_net(skb->dev);
2425 bool refcounted;
2426 int drop_reason;
2427
2428 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
2429
2430
2431
2432
2433 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
2434 goto drop;
2435
2436 uh = udp_hdr(skb);
2437 ulen = ntohs(uh->len);
2438 saddr = ip_hdr(skb)->saddr;
2439 daddr = ip_hdr(skb)->daddr;
2440
2441 if (ulen > skb->len)
2442 goto short_packet;
2443
2444 if (proto == IPPROTO_UDP) {
2445
2446 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
2447 goto short_packet;
2448 uh = udp_hdr(skb);
2449 }
2450
2451 if (udp4_csum_init(skb, uh, proto))
2452 goto csum_error;
2453
2454 sk = skb_steal_sock(skb, &refcounted);
2455 if (sk) {
2456 struct dst_entry *dst = skb_dst(skb);
2457 int ret;
2458
2459 if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
2460 udp_sk_rx_dst_set(sk, dst);
2461
2462 ret = udp_unicast_rcv_skb(sk, skb, uh);
2463 if (refcounted)
2464 sock_put(sk);
2465 return ret;
2466 }
2467
2468 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
2469 return __udp4_lib_mcast_deliver(net, skb, uh,
2470 saddr, daddr, udptable, proto);
2471
2472 sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
2473 if (sk)
2474 return udp_unicast_rcv_skb(sk, skb, uh);
2475
2476 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
2477 goto drop;
2478 nf_reset_ct(skb);
2479
2480
2481 if (udp_lib_checksum_complete(skb))
2482 goto csum_error;
2483
2484 drop_reason = SKB_DROP_REASON_NO_SOCKET;
2485 __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
2486 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
2487
2488
2489
2490
2491
2492 kfree_skb_reason(skb, drop_reason);
2493 return 0;
2494
2495 short_packet:
2496 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
2497 net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
2498 proto == IPPROTO_UDPLITE ? "Lite" : "",
2499 &saddr, ntohs(uh->source),
2500 ulen, skb->len,
2501 &daddr, ntohs(uh->dest));
2502 goto drop;
2503
2504 csum_error:
2505
2506
2507
2508
2509 drop_reason = SKB_DROP_REASON_UDP_CSUM;
2510 net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
2511 proto == IPPROTO_UDPLITE ? "Lite" : "",
2512 &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
2513 ulen);
2514 __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
2515 drop:
2516 __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
2517 kfree_skb_reason(skb, drop_reason);
2518 return 0;
2519 }
2520
2521
2522
2523
2524 static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
2525 __be16 loc_port, __be32 loc_addr,
2526 __be16 rmt_port, __be32 rmt_addr,
2527 int dif, int sdif)
2528 {
2529 struct sock *sk, *result;
2530 unsigned short hnum = ntohs(loc_port);
2531 unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
2532 struct udp_hslot *hslot = &udp_table.hash[slot];
2533
2534
2535 if (hslot->count > 10)
2536 return NULL;
2537
2538 result = NULL;
2539 sk_for_each_rcu(sk, &hslot->head) {
2540 if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
2541 rmt_port, rmt_addr, dif, sdif, hnum)) {
2542 if (result)
2543 return NULL;
2544 result = sk;
2545 }
2546 }
2547
2548 return result;
2549 }
2550
2551
2552
2553
2554
2555 static struct sock *__udp4_lib_demux_lookup(struct net *net,
2556 __be16 loc_port, __be32 loc_addr,
2557 __be16 rmt_port, __be32 rmt_addr,
2558 int dif, int sdif)
2559 {
2560 unsigned short hnum = ntohs(loc_port);
2561 unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
2562 unsigned int slot2 = hash2 & udp_table.mask;
2563 struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
2564 INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
2565 const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
2566 struct sock *sk;
2567
2568 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
2569 if (inet_match(net, sk, acookie, ports, dif, sdif))
2570 return sk;
2571
2572 break;
2573 }
2574 return NULL;
2575 }
2576
2577 int udp_v4_early_demux(struct sk_buff *skb)
2578 {
2579 struct net *net = dev_net(skb->dev);
2580 struct in_device *in_dev = NULL;
2581 const struct iphdr *iph;
2582 const struct udphdr *uh;
2583 struct sock *sk = NULL;
2584 struct dst_entry *dst;
2585 int dif = skb->dev->ifindex;
2586 int sdif = inet_sdif(skb);
2587 int ours;
2588
2589
2590 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
2591 return 0;
2592
2593 iph = ip_hdr(skb);
2594 uh = udp_hdr(skb);
2595
2596 if (skb->pkt_type == PACKET_MULTICAST) {
2597 in_dev = __in_dev_get_rcu(skb->dev);
2598
2599 if (!in_dev)
2600 return 0;
2601
2602 ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
2603 iph->protocol);
2604 if (!ours)
2605 return 0;
2606
2607 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
2608 uh->source, iph->saddr,
2609 dif, sdif);
2610 } else if (skb->pkt_type == PACKET_HOST) {
2611 sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
2612 uh->source, iph->saddr, dif, sdif);
2613 }
2614
2615 if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
2616 return 0;
2617
2618 skb->sk = sk;
2619 skb->destructor = sock_efree;
2620 dst = rcu_dereference(sk->sk_rx_dst);
2621
2622 if (dst)
2623 dst = dst_check(dst, 0);
2624 if (dst) {
2625 u32 itag = 0;
2626
2627
2628
2629
2630
2631 skb_dst_set_noref(skb, dst);
2632
2633
2634
2635
2636 if (!inet_sk(sk)->inet_daddr && in_dev)
2637 return ip_mc_validate_source(skb, iph->daddr,
2638 iph->saddr,
2639 iph->tos & IPTOS_RT_MASK,
2640 skb->dev, in_dev, &itag);
2641 }
2642 return 0;
2643 }
2644
2645 int udp_rcv(struct sk_buff *skb)
2646 {
2647 return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
2648 }
2649
2650 void udp_destroy_sock(struct sock *sk)
2651 {
2652 struct udp_sock *up = udp_sk(sk);
2653 bool slow = lock_sock_fast(sk);
2654
2655
2656 sock_set_flag(sk, SOCK_DEAD);
2657 udp_flush_pending_frames(sk);
2658 unlock_sock_fast(sk, slow);
2659 if (static_branch_unlikely(&udp_encap_needed_key)) {
2660 if (up->encap_type) {
2661 void (*encap_destroy)(struct sock *sk);
2662 encap_destroy = READ_ONCE(up->encap_destroy);
2663 if (encap_destroy)
2664 encap_destroy(sk);
2665 }
2666 if (up->encap_enabled)
2667 static_branch_dec(&udp_encap_needed_key);
2668 }
2669 }
2670
2671
2672
2673
2674 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
2675 sockptr_t optval, unsigned int optlen,
2676 int (*push_pending_frames)(struct sock *))
2677 {
2678 struct udp_sock *up = udp_sk(sk);
2679 int val, valbool;
2680 int err = 0;
2681 int is_udplite = IS_UDPLITE(sk);
2682
2683 if (optlen < sizeof(int))
2684 return -EINVAL;
2685
2686 if (copy_from_sockptr(&val, optval, sizeof(val)))
2687 return -EFAULT;
2688
2689 valbool = val ? 1 : 0;
2690
2691 switch (optname) {
2692 case UDP_CORK:
2693 if (val != 0) {
2694 WRITE_ONCE(up->corkflag, 1);
2695 } else {
2696 WRITE_ONCE(up->corkflag, 0);
2697 lock_sock(sk);
2698 push_pending_frames(sk);
2699 release_sock(sk);
2700 }
2701 break;
2702
2703 case UDP_ENCAP:
2704 switch (val) {
2705 case 0:
2706 #ifdef CONFIG_XFRM
2707 case UDP_ENCAP_ESPINUDP:
2708 case UDP_ENCAP_ESPINUDP_NON_IKE:
2709 #if IS_ENABLED(CONFIG_IPV6)
2710 if (sk->sk_family == AF_INET6)
2711 up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
2712 else
2713 #endif
2714 up->encap_rcv = xfrm4_udp_encap_rcv;
2715 #endif
2716 fallthrough;
2717 case UDP_ENCAP_L2TPINUDP:
2718 up->encap_type = val;
2719 lock_sock(sk);
2720 udp_tunnel_encap_enable(sk->sk_socket);
2721 release_sock(sk);
2722 break;
2723 default:
2724 err = -ENOPROTOOPT;
2725 break;
2726 }
2727 break;
2728
2729 case UDP_NO_CHECK6_TX:
2730 up->no_check6_tx = valbool;
2731 break;
2732
2733 case UDP_NO_CHECK6_RX:
2734 up->no_check6_rx = valbool;
2735 break;
2736
2737 case UDP_SEGMENT:
2738 if (val < 0 || val > USHRT_MAX)
2739 return -EINVAL;
2740 WRITE_ONCE(up->gso_size, val);
2741 break;
2742
2743 case UDP_GRO:
2744 lock_sock(sk);
2745
2746
2747 if (valbool)
2748 udp_tunnel_encap_enable(sk->sk_socket);
2749 up->gro_enabled = valbool;
2750 up->accept_udp_l4 = valbool;
2751 release_sock(sk);
2752 break;
2753
2754
2755
2756
2757
2758
2759 case UDPLITE_SEND_CSCOV:
2760 if (!is_udplite)
2761 return -ENOPROTOOPT;
2762 if (val != 0 && val < 8)
2763 val = 8;
2764 else if (val > USHRT_MAX)
2765 val = USHRT_MAX;
2766 up->pcslen = val;
2767 up->pcflag |= UDPLITE_SEND_CC;
2768 break;
2769
2770
2771
2772
2773 case UDPLITE_RECV_CSCOV:
2774 if (!is_udplite)
2775 return -ENOPROTOOPT;
2776 if (val != 0 && val < 8)
2777 val = 8;
2778 else if (val > USHRT_MAX)
2779 val = USHRT_MAX;
2780 up->pcrlen = val;
2781 up->pcflag |= UDPLITE_RECV_CC;
2782 break;
2783
2784 default:
2785 err = -ENOPROTOOPT;
2786 break;
2787 }
2788
2789 return err;
2790 }
2791 EXPORT_SYMBOL(udp_lib_setsockopt);
2792
2793 int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
2794 unsigned int optlen)
2795 {
2796 if (level == SOL_UDP || level == SOL_UDPLITE)
2797 return udp_lib_setsockopt(sk, level, optname,
2798 optval, optlen,
2799 udp_push_pending_frames);
2800 return ip_setsockopt(sk, level, optname, optval, optlen);
2801 }
2802
2803 int udp_lib_getsockopt(struct sock *sk, int level, int optname,
2804 char __user *optval, int __user *optlen)
2805 {
2806 struct udp_sock *up = udp_sk(sk);
2807 int val, len;
2808
2809 if (get_user(len, optlen))
2810 return -EFAULT;
2811
2812 len = min_t(unsigned int, len, sizeof(int));
2813
2814 if (len < 0)
2815 return -EINVAL;
2816
2817 switch (optname) {
2818 case UDP_CORK:
2819 val = READ_ONCE(up->corkflag);
2820 break;
2821
2822 case UDP_ENCAP:
2823 val = up->encap_type;
2824 break;
2825
2826 case UDP_NO_CHECK6_TX:
2827 val = up->no_check6_tx;
2828 break;
2829
2830 case UDP_NO_CHECK6_RX:
2831 val = up->no_check6_rx;
2832 break;
2833
2834 case UDP_SEGMENT:
2835 val = READ_ONCE(up->gso_size);
2836 break;
2837
2838 case UDP_GRO:
2839 val = up->gro_enabled;
2840 break;
2841
2842
2843
2844 case UDPLITE_SEND_CSCOV:
2845 val = up->pcslen;
2846 break;
2847
2848 case UDPLITE_RECV_CSCOV:
2849 val = up->pcrlen;
2850 break;
2851
2852 default:
2853 return -ENOPROTOOPT;
2854 }
2855
2856 if (put_user(len, optlen))
2857 return -EFAULT;
2858 if (copy_to_user(optval, &val, len))
2859 return -EFAULT;
2860 return 0;
2861 }
2862 EXPORT_SYMBOL(udp_lib_getsockopt);
2863
2864 int udp_getsockopt(struct sock *sk, int level, int optname,
2865 char __user *optval, int __user *optlen)
2866 {
2867 if (level == SOL_UDP || level == SOL_UDPLITE)
2868 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
2869 return ip_getsockopt(sk, level, optname, optval, optlen);
2870 }
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885 __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
2886 {
2887 __poll_t mask = datagram_poll(file, sock, wait);
2888 struct sock *sk = sock->sk;
2889
2890 if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
2891 mask |= EPOLLIN | EPOLLRDNORM;
2892
2893
2894 if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
2895 !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
2896 mask &= ~(EPOLLIN | EPOLLRDNORM);
2897
2898
2899 if (sk_is_readable(sk))
2900 mask |= EPOLLIN | EPOLLRDNORM;
2901 return mask;
2902
2903 }
2904 EXPORT_SYMBOL(udp_poll);
2905
2906 int udp_abort(struct sock *sk, int err)
2907 {
2908 lock_sock(sk);
2909
2910
2911
2912
2913 if (sock_flag(sk, SOCK_DEAD))
2914 goto out;
2915
2916 sk->sk_err = err;
2917 sk_error_report(sk);
2918 __udp_disconnect(sk, 0);
2919
2920 out:
2921 release_sock(sk);
2922
2923 return 0;
2924 }
2925 EXPORT_SYMBOL_GPL(udp_abort);
2926
2927 struct proto udp_prot = {
2928 .name = "UDP",
2929 .owner = THIS_MODULE,
2930 .close = udp_lib_close,
2931 .pre_connect = udp_pre_connect,
2932 .connect = ip4_datagram_connect,
2933 .disconnect = udp_disconnect,
2934 .ioctl = udp_ioctl,
2935 .init = udp_init_sock,
2936 .destroy = udp_destroy_sock,
2937 .setsockopt = udp_setsockopt,
2938 .getsockopt = udp_getsockopt,
2939 .sendmsg = udp_sendmsg,
2940 .recvmsg = udp_recvmsg,
2941 .sendpage = udp_sendpage,
2942 .release_cb = ip4_datagram_release_cb,
2943 .hash = udp_lib_hash,
2944 .unhash = udp_lib_unhash,
2945 .rehash = udp_v4_rehash,
2946 .get_port = udp_v4_get_port,
2947 .put_port = udp_lib_unhash,
2948 #ifdef CONFIG_BPF_SYSCALL
2949 .psock_update_sk_prot = udp_bpf_update_proto,
2950 #endif
2951 .memory_allocated = &udp_memory_allocated,
2952 .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
2953
2954 .sysctl_mem = sysctl_udp_mem,
2955 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
2956 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
2957 .obj_size = sizeof(struct udp_sock),
2958 .h.udp_table = &udp_table,
2959 .diag_destroy = udp_abort,
2960 };
2961 EXPORT_SYMBOL(udp_prot);
2962
2963
2964 #ifdef CONFIG_PROC_FS
2965
2966 static struct sock *udp_get_first(struct seq_file *seq, int start)
2967 {
2968 struct sock *sk;
2969 struct udp_seq_afinfo *afinfo;
2970 struct udp_iter_state *state = seq->private;
2971 struct net *net = seq_file_net(seq);
2972
2973 if (state->bpf_seq_afinfo)
2974 afinfo = state->bpf_seq_afinfo;
2975 else
2976 afinfo = pde_data(file_inode(seq->file));
2977
2978 for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
2979 ++state->bucket) {
2980 struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
2981
2982 if (hlist_empty(&hslot->head))
2983 continue;
2984
2985 spin_lock_bh(&hslot->lock);
2986 sk_for_each(sk, &hslot->head) {
2987 if (!net_eq(sock_net(sk), net))
2988 continue;
2989 if (afinfo->family == AF_UNSPEC ||
2990 sk->sk_family == afinfo->family)
2991 goto found;
2992 }
2993 spin_unlock_bh(&hslot->lock);
2994 }
2995 sk = NULL;
2996 found:
2997 return sk;
2998 }
2999
3000 static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
3001 {
3002 struct udp_seq_afinfo *afinfo;
3003 struct udp_iter_state *state = seq->private;
3004 struct net *net = seq_file_net(seq);
3005
3006 if (state->bpf_seq_afinfo)
3007 afinfo = state->bpf_seq_afinfo;
3008 else
3009 afinfo = pde_data(file_inode(seq->file));
3010
3011 do {
3012 sk = sk_next(sk);
3013 } while (sk && (!net_eq(sock_net(sk), net) ||
3014 (afinfo->family != AF_UNSPEC &&
3015 sk->sk_family != afinfo->family)));
3016
3017 if (!sk) {
3018 if (state->bucket <= afinfo->udp_table->mask)
3019 spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
3020 return udp_get_first(seq, state->bucket + 1);
3021 }
3022 return sk;
3023 }
3024
3025 static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
3026 {
3027 struct sock *sk = udp_get_first(seq, 0);
3028
3029 if (sk)
3030 while (pos && (sk = udp_get_next(seq, sk)) != NULL)
3031 --pos;
3032 return pos ? NULL : sk;
3033 }
3034
3035 void *udp_seq_start(struct seq_file *seq, loff_t *pos)
3036 {
3037 struct udp_iter_state *state = seq->private;
3038 state->bucket = MAX_UDP_PORTS;
3039
3040 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
3041 }
3042 EXPORT_SYMBOL(udp_seq_start);
3043
3044 void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3045 {
3046 struct sock *sk;
3047
3048 if (v == SEQ_START_TOKEN)
3049 sk = udp_get_idx(seq, 0);
3050 else
3051 sk = udp_get_next(seq, v);
3052
3053 ++*pos;
3054 return sk;
3055 }
3056 EXPORT_SYMBOL(udp_seq_next);
3057
3058 void udp_seq_stop(struct seq_file *seq, void *v)
3059 {
3060 struct udp_seq_afinfo *afinfo;
3061 struct udp_iter_state *state = seq->private;
3062
3063 if (state->bpf_seq_afinfo)
3064 afinfo = state->bpf_seq_afinfo;
3065 else
3066 afinfo = pde_data(file_inode(seq->file));
3067
3068 if (state->bucket <= afinfo->udp_table->mask)
3069 spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
3070 }
3071 EXPORT_SYMBOL(udp_seq_stop);
3072
3073
3074 static void udp4_format_sock(struct sock *sp, struct seq_file *f,
3075 int bucket)
3076 {
3077 struct inet_sock *inet = inet_sk(sp);
3078 __be32 dest = inet->inet_daddr;
3079 __be32 src = inet->inet_rcv_saddr;
3080 __u16 destp = ntohs(inet->inet_dport);
3081 __u16 srcp = ntohs(inet->inet_sport);
3082
3083 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
3084 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u",
3085 bucket, src, srcp, dest, destp, sp->sk_state,
3086 sk_wmem_alloc_get(sp),
3087 udp_rqueue_get(sp),
3088 0, 0L, 0,
3089 from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
3090 0, sock_i_ino(sp),
3091 refcount_read(&sp->sk_refcnt), sp,
3092 atomic_read(&sp->sk_drops));
3093 }
3094
3095 int udp4_seq_show(struct seq_file *seq, void *v)
3096 {
3097 seq_setwidth(seq, 127);
3098 if (v == SEQ_START_TOKEN)
3099 seq_puts(seq, " sl local_address rem_address st tx_queue "
3100 "rx_queue tr tm->when retrnsmt uid timeout "
3101 "inode ref pointer drops");
3102 else {
3103 struct udp_iter_state *state = seq->private;
3104
3105 udp4_format_sock(v, seq, state->bucket);
3106 }
3107 seq_pad(seq, '\n');
3108 return 0;
3109 }
3110
3111 #ifdef CONFIG_BPF_SYSCALL
3112 struct bpf_iter__udp {
3113 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3114 __bpf_md_ptr(struct udp_sock *, udp_sk);
3115 uid_t uid __aligned(8);
3116 int bucket __aligned(8);
3117 };
3118
3119 static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3120 struct udp_sock *udp_sk, uid_t uid, int bucket)
3121 {
3122 struct bpf_iter__udp ctx;
3123
3124 meta->seq_num--;
3125 ctx.meta = meta;
3126 ctx.udp_sk = udp_sk;
3127 ctx.uid = uid;
3128 ctx.bucket = bucket;
3129 return bpf_iter_run_prog(prog, &ctx);
3130 }
3131
3132 static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
3133 {
3134 struct udp_iter_state *state = seq->private;
3135 struct bpf_iter_meta meta;
3136 struct bpf_prog *prog;
3137 struct sock *sk = v;
3138 uid_t uid;
3139
3140 if (v == SEQ_START_TOKEN)
3141 return 0;
3142
3143 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3144 meta.seq = seq;
3145 prog = bpf_iter_get_info(&meta, false);
3146 return udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
3147 }
3148
3149 static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
3150 {
3151 struct bpf_iter_meta meta;
3152 struct bpf_prog *prog;
3153
3154 if (!v) {
3155 meta.seq = seq;
3156 prog = bpf_iter_get_info(&meta, true);
3157 if (prog)
3158 (void)udp_prog_seq_show(prog, &meta, v, 0, 0);
3159 }
3160
3161 udp_seq_stop(seq, v);
3162 }
3163
3164 static const struct seq_operations bpf_iter_udp_seq_ops = {
3165 .start = udp_seq_start,
3166 .next = udp_seq_next,
3167 .stop = bpf_iter_udp_seq_stop,
3168 .show = bpf_iter_udp_seq_show,
3169 };
3170 #endif
3171
3172 const struct seq_operations udp_seq_ops = {
3173 .start = udp_seq_start,
3174 .next = udp_seq_next,
3175 .stop = udp_seq_stop,
3176 .show = udp4_seq_show,
3177 };
3178 EXPORT_SYMBOL(udp_seq_ops);
3179
3180 static struct udp_seq_afinfo udp4_seq_afinfo = {
3181 .family = AF_INET,
3182 .udp_table = &udp_table,
3183 };
3184
3185 static int __net_init udp4_proc_init_net(struct net *net)
3186 {
3187 if (!proc_create_net_data("udp", 0444, net->proc_net, &udp_seq_ops,
3188 sizeof(struct udp_iter_state), &udp4_seq_afinfo))
3189 return -ENOMEM;
3190 return 0;
3191 }
3192
3193 static void __net_exit udp4_proc_exit_net(struct net *net)
3194 {
3195 remove_proc_entry("udp", net->proc_net);
3196 }
3197
3198 static struct pernet_operations udp4_net_ops = {
3199 .init = udp4_proc_init_net,
3200 .exit = udp4_proc_exit_net,
3201 };
3202
3203 int __init udp4_proc_init(void)
3204 {
3205 return register_pernet_subsys(&udp4_net_ops);
3206 }
3207
3208 void udp4_proc_exit(void)
3209 {
3210 unregister_pernet_subsys(&udp4_net_ops);
3211 }
3212 #endif
3213
3214 static __initdata unsigned long uhash_entries;
3215 static int __init set_uhash_entries(char *str)
3216 {
3217 ssize_t ret;
3218
3219 if (!str)
3220 return 0;
3221
3222 ret = kstrtoul(str, 0, &uhash_entries);
3223 if (ret)
3224 return 0;
3225
3226 if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
3227 uhash_entries = UDP_HTABLE_SIZE_MIN;
3228 return 1;
3229 }
3230 __setup("uhash_entries=", set_uhash_entries);
3231
3232 void __init udp_table_init(struct udp_table *table, const char *name)
3233 {
3234 unsigned int i;
3235
3236 table->hash = alloc_large_system_hash(name,
3237 2 * sizeof(struct udp_hslot),
3238 uhash_entries,
3239 21,
3240 0,
3241 &table->log,
3242 &table->mask,
3243 UDP_HTABLE_SIZE_MIN,
3244 64 * 1024);
3245
3246 table->hash2 = table->hash + (table->mask + 1);
3247 for (i = 0; i <= table->mask; i++) {
3248 INIT_HLIST_HEAD(&table->hash[i].head);
3249 table->hash[i].count = 0;
3250 spin_lock_init(&table->hash[i].lock);
3251 }
3252 for (i = 0; i <= table->mask; i++) {
3253 INIT_HLIST_HEAD(&table->hash2[i].head);
3254 table->hash2[i].count = 0;
3255 spin_lock_init(&table->hash2[i].lock);
3256 }
3257 }
3258
3259 u32 udp_flow_hashrnd(void)
3260 {
3261 static u32 hashrnd __read_mostly;
3262
3263 net_get_random_once(&hashrnd, sizeof(hashrnd));
3264
3265 return hashrnd;
3266 }
3267 EXPORT_SYMBOL(udp_flow_hashrnd);
3268
3269 static int __net_init udp_sysctl_init(struct net *net)
3270 {
3271 net->ipv4.sysctl_udp_rmem_min = PAGE_SIZE;
3272 net->ipv4.sysctl_udp_wmem_min = PAGE_SIZE;
3273
3274 #ifdef CONFIG_NET_L3_MASTER_DEV
3275 net->ipv4.sysctl_udp_l3mdev_accept = 0;
3276 #endif
3277
3278 return 0;
3279 }
3280
3281 static struct pernet_operations __net_initdata udp_sysctl_ops = {
3282 .init = udp_sysctl_init,
3283 };
3284
3285 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3286 DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
3287 struct udp_sock *udp_sk, uid_t uid, int bucket)
3288
3289 static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
3290 {
3291 struct udp_iter_state *st = priv_data;
3292 struct udp_seq_afinfo *afinfo;
3293 int ret;
3294
3295 afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
3296 if (!afinfo)
3297 return -ENOMEM;
3298
3299 afinfo->family = AF_UNSPEC;
3300 afinfo->udp_table = &udp_table;
3301 st->bpf_seq_afinfo = afinfo;
3302 ret = bpf_iter_init_seq_net(priv_data, aux);
3303 if (ret)
3304 kfree(afinfo);
3305 return ret;
3306 }
3307
3308 static void bpf_iter_fini_udp(void *priv_data)
3309 {
3310 struct udp_iter_state *st = priv_data;
3311
3312 kfree(st->bpf_seq_afinfo);
3313 bpf_iter_fini_seq_net(priv_data);
3314 }
3315
3316 static const struct bpf_iter_seq_info udp_seq_info = {
3317 .seq_ops = &bpf_iter_udp_seq_ops,
3318 .init_seq_private = bpf_iter_init_udp,
3319 .fini_seq_private = bpf_iter_fini_udp,
3320 .seq_priv_size = sizeof(struct udp_iter_state),
3321 };
3322
3323 static struct bpf_iter_reg udp_reg_info = {
3324 .target = "udp",
3325 .ctx_arg_info_size = 1,
3326 .ctx_arg_info = {
3327 { offsetof(struct bpf_iter__udp, udp_sk),
3328 PTR_TO_BTF_ID_OR_NULL },
3329 },
3330 .seq_info = &udp_seq_info,
3331 };
3332
3333 static void __init bpf_iter_register(void)
3334 {
3335 udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP];
3336 if (bpf_iter_reg_target(&udp_reg_info))
3337 pr_warn("Warning: could not register bpf iterator udp\n");
3338 }
3339 #endif
3340
3341 void __init udp_init(void)
3342 {
3343 unsigned long limit;
3344 unsigned int i;
3345
3346 udp_table_init(&udp_table, "UDP");
3347 limit = nr_free_buffer_pages() / 8;
3348 limit = max(limit, 128UL);
3349 sysctl_udp_mem[0] = limit / 4 * 3;
3350 sysctl_udp_mem[1] = limit;
3351 sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
3352
3353
3354 udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
3355 udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
3356 GFP_KERNEL);
3357 if (!udp_busylocks)
3358 panic("UDP: failed to alloc udp_busylocks\n");
3359 for (i = 0; i < (1U << udp_busylocks_log); i++)
3360 spin_lock_init(udp_busylocks + i);
3361
3362 if (register_pernet_subsys(&udp_sysctl_ops))
3363 panic("UDP: failed to init sysctl parameters.\n");
3364
3365 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3366 bpf_iter_register();
3367 #endif
3368 }