Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* Peer event handling, typically ICMP messages.
0003  *
0004  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
0005  * Written by David Howells (dhowells@redhat.com)
0006  */
0007 
0008 #include <linux/module.h>
0009 #include <linux/net.h>
0010 #include <linux/skbuff.h>
0011 #include <linux/errqueue.h>
0012 #include <linux/udp.h>
0013 #include <linux/in.h>
0014 #include <linux/in6.h>
0015 #include <linux/icmp.h>
0016 #include <net/sock.h>
0017 #include <net/af_rxrpc.h>
0018 #include <net/ip.h>
0019 #include <net/icmp.h>
0020 #include "ar-internal.h"
0021 
0022 static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int);
0023 static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
0024 static void rxrpc_distribute_error(struct rxrpc_peer *, int,
0025                    enum rxrpc_call_completion);
0026 
0027 /*
0028  * Find the peer associated with an ICMPv4 packet.
0029  */
0030 static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
0031                              struct sk_buff *skb,
0032                              unsigned int udp_offset,
0033                              unsigned int *info,
0034                              struct sockaddr_rxrpc *srx)
0035 {
0036     struct iphdr *ip, *ip0 = ip_hdr(skb);
0037     struct icmphdr *icmp = icmp_hdr(skb);
0038     struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
0039 
0040     _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code);
0041 
0042     switch (icmp->type) {
0043     case ICMP_DEST_UNREACH:
0044         *info = ntohs(icmp->un.frag.mtu);
0045         fallthrough;
0046     case ICMP_TIME_EXCEEDED:
0047     case ICMP_PARAMETERPROB:
0048         ip = (struct iphdr *)((void *)icmp + 8);
0049         break;
0050     default:
0051         return NULL;
0052     }
0053 
0054     memset(srx, 0, sizeof(*srx));
0055     srx->transport_type = local->srx.transport_type;
0056     srx->transport_len = local->srx.transport_len;
0057     srx->transport.family = local->srx.transport.family;
0058 
0059     /* Can we see an ICMP4 packet on an ICMP6 listening socket?  and vice
0060      * versa?
0061      */
0062     switch (srx->transport.family) {
0063     case AF_INET:
0064         srx->transport_len = sizeof(srx->transport.sin);
0065         srx->transport.family = AF_INET;
0066         srx->transport.sin.sin_port = udp->dest;
0067         memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
0068                sizeof(struct in_addr));
0069         break;
0070 
0071 #ifdef CONFIG_AF_RXRPC_IPV6
0072     case AF_INET6:
0073         srx->transport_len = sizeof(srx->transport.sin);
0074         srx->transport.family = AF_INET;
0075         srx->transport.sin.sin_port = udp->dest;
0076         memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
0077                sizeof(struct in_addr));
0078         break;
0079 #endif
0080 
0081     default:
0082         WARN_ON_ONCE(1);
0083         return NULL;
0084     }
0085 
0086     _net("ICMP {%pISp}", &srx->transport);
0087     return rxrpc_lookup_peer_rcu(local, srx);
0088 }
0089 
0090 #ifdef CONFIG_AF_RXRPC_IPV6
0091 /*
0092  * Find the peer associated with an ICMPv6 packet.
0093  */
0094 static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local,
0095                               struct sk_buff *skb,
0096                               unsigned int udp_offset,
0097                               unsigned int *info,
0098                               struct sockaddr_rxrpc *srx)
0099 {
0100     struct icmp6hdr *icmp = icmp6_hdr(skb);
0101     struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb);
0102     struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
0103 
0104     _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code);
0105 
0106     switch (icmp->icmp6_type) {
0107     case ICMPV6_DEST_UNREACH:
0108         *info = ntohl(icmp->icmp6_mtu);
0109         fallthrough;
0110     case ICMPV6_PKT_TOOBIG:
0111     case ICMPV6_TIME_EXCEED:
0112     case ICMPV6_PARAMPROB:
0113         ip = (struct ipv6hdr *)((void *)icmp + 8);
0114         break;
0115     default:
0116         return NULL;
0117     }
0118 
0119     memset(srx, 0, sizeof(*srx));
0120     srx->transport_type = local->srx.transport_type;
0121     srx->transport_len = local->srx.transport_len;
0122     srx->transport.family = local->srx.transport.family;
0123 
0124     /* Can we see an ICMP4 packet on an ICMP6 listening socket?  and vice
0125      * versa?
0126      */
0127     switch (srx->transport.family) {
0128     case AF_INET:
0129         _net("Rx ICMP6 on v4 sock");
0130         srx->transport_len = sizeof(srx->transport.sin);
0131         srx->transport.family = AF_INET;
0132         srx->transport.sin.sin_port = udp->dest;
0133         memcpy(&srx->transport.sin.sin_addr,
0134                &ip->daddr.s6_addr32[3], sizeof(struct in_addr));
0135         break;
0136     case AF_INET6:
0137         _net("Rx ICMP6");
0138         srx->transport.sin.sin_port = udp->dest;
0139         memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr,
0140                sizeof(struct in6_addr));
0141         break;
0142     default:
0143         WARN_ON_ONCE(1);
0144         return NULL;
0145     }
0146 
0147     _net("ICMP {%pISp}", &srx->transport);
0148     return rxrpc_lookup_peer_rcu(local, srx);
0149 }
0150 #endif /* CONFIG_AF_RXRPC_IPV6 */
0151 
0152 /*
0153  * Handle an error received on the local endpoint as a tunnel.
0154  */
0155 void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb,
0156              unsigned int udp_offset)
0157 {
0158     struct sock_extended_err ee;
0159     struct sockaddr_rxrpc srx;
0160     struct rxrpc_local *local;
0161     struct rxrpc_peer *peer;
0162     unsigned int info = 0;
0163     int err;
0164     u8 version = ip_hdr(skb)->version;
0165     u8 type = icmp_hdr(skb)->type;
0166     u8 code = icmp_hdr(skb)->code;
0167 
0168     rcu_read_lock();
0169     local = rcu_dereference_sk_user_data(sk);
0170     if (unlikely(!local)) {
0171         rcu_read_unlock();
0172         return;
0173     }
0174 
0175     rxrpc_new_skb(skb, rxrpc_skb_received);
0176 
0177     switch (ip_hdr(skb)->version) {
0178     case IPVERSION:
0179         peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset,
0180                           &info, &srx);
0181         break;
0182 #ifdef CONFIG_AF_RXRPC_IPV6
0183     case 6:
0184         peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset,
0185                            &info, &srx);
0186         break;
0187 #endif
0188     default:
0189         rcu_read_unlock();
0190         return;
0191     }
0192 
0193     if (peer && !rxrpc_get_peer_maybe(peer))
0194         peer = NULL;
0195     if (!peer) {
0196         rcu_read_unlock();
0197         return;
0198     }
0199 
0200     memset(&ee, 0, sizeof(ee));
0201 
0202     switch (version) {
0203     case IPVERSION:
0204         switch (type) {
0205         case ICMP_DEST_UNREACH:
0206             switch (code) {
0207             case ICMP_FRAG_NEEDED:
0208                 rxrpc_adjust_mtu(peer, info);
0209                 rcu_read_unlock();
0210                 rxrpc_put_peer(peer);
0211                 return;
0212             default:
0213                 break;
0214             }
0215 
0216             err = EHOSTUNREACH;
0217             if (code <= NR_ICMP_UNREACH) {
0218                 /* Might want to do something different with
0219                  * non-fatal errors
0220                  */
0221                 //harderr = icmp_err_convert[code].fatal;
0222                 err = icmp_err_convert[code].errno;
0223             }
0224             break;
0225 
0226         case ICMP_TIME_EXCEEDED:
0227             err = EHOSTUNREACH;
0228             break;
0229         default:
0230             err = EPROTO;
0231             break;
0232         }
0233 
0234         ee.ee_origin = SO_EE_ORIGIN_ICMP;
0235         ee.ee_type = type;
0236         ee.ee_code = code;
0237         ee.ee_errno = err;
0238         break;
0239 
0240 #ifdef CONFIG_AF_RXRPC_IPV6
0241     case 6:
0242         switch (type) {
0243         case ICMPV6_PKT_TOOBIG:
0244             rxrpc_adjust_mtu(peer, info);
0245             rcu_read_unlock();
0246             rxrpc_put_peer(peer);
0247             return;
0248         }
0249 
0250         icmpv6_err_convert(type, code, &err);
0251 
0252         if (err == EACCES)
0253             err = EHOSTUNREACH;
0254 
0255         ee.ee_origin = SO_EE_ORIGIN_ICMP6;
0256         ee.ee_type = type;
0257         ee.ee_code = code;
0258         ee.ee_errno = err;
0259         break;
0260 #endif
0261     }
0262 
0263     trace_rxrpc_rx_icmp(peer, &ee, &srx);
0264 
0265     rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR);
0266     rcu_read_unlock();
0267     rxrpc_put_peer(peer);
0268 }
0269 
0270 /*
0271  * Find the peer associated with a local error.
0272  */
0273 static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
0274                               const struct sk_buff *skb,
0275                               struct sockaddr_rxrpc *srx)
0276 {
0277     struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
0278 
0279     _enter("");
0280 
0281     memset(srx, 0, sizeof(*srx));
0282     srx->transport_type = local->srx.transport_type;
0283     srx->transport_len = local->srx.transport_len;
0284     srx->transport.family = local->srx.transport.family;
0285 
0286     switch (srx->transport.family) {
0287     case AF_INET:
0288         srx->transport_len = sizeof(srx->transport.sin);
0289         srx->transport.family = AF_INET;
0290         srx->transport.sin.sin_port = serr->port;
0291         switch (serr->ee.ee_origin) {
0292         case SO_EE_ORIGIN_ICMP:
0293             _net("Rx ICMP");
0294             memcpy(&srx->transport.sin.sin_addr,
0295                    skb_network_header(skb) + serr->addr_offset,
0296                    sizeof(struct in_addr));
0297             break;
0298         case SO_EE_ORIGIN_ICMP6:
0299             _net("Rx ICMP6 on v4 sock");
0300             memcpy(&srx->transport.sin.sin_addr,
0301                    skb_network_header(skb) + serr->addr_offset + 12,
0302                    sizeof(struct in_addr));
0303             break;
0304         default:
0305             memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
0306                    sizeof(struct in_addr));
0307             break;
0308         }
0309         break;
0310 
0311 #ifdef CONFIG_AF_RXRPC_IPV6
0312     case AF_INET6:
0313         switch (serr->ee.ee_origin) {
0314         case SO_EE_ORIGIN_ICMP6:
0315             _net("Rx ICMP6");
0316             srx->transport.sin6.sin6_port = serr->port;
0317             memcpy(&srx->transport.sin6.sin6_addr,
0318                    skb_network_header(skb) + serr->addr_offset,
0319                    sizeof(struct in6_addr));
0320             break;
0321         case SO_EE_ORIGIN_ICMP:
0322             _net("Rx ICMP on v6 sock");
0323             srx->transport_len = sizeof(srx->transport.sin);
0324             srx->transport.family = AF_INET;
0325             srx->transport.sin.sin_port = serr->port;
0326             memcpy(&srx->transport.sin.sin_addr,
0327                    skb_network_header(skb) + serr->addr_offset,
0328                    sizeof(struct in_addr));
0329             break;
0330         default:
0331             memcpy(&srx->transport.sin6.sin6_addr,
0332                    &ipv6_hdr(skb)->saddr,
0333                    sizeof(struct in6_addr));
0334             break;
0335         }
0336         break;
0337 #endif
0338 
0339     default:
0340         BUG();
0341     }
0342 
0343     return rxrpc_lookup_peer_rcu(local, srx);
0344 }
0345 
0346 /*
0347  * Handle an MTU/fragmentation problem.
0348  */
0349 static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
0350 {
0351     _net("Rx ICMP Fragmentation Needed (%d)", mtu);
0352 
0353     /* wind down the local interface MTU */
0354     if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
0355         peer->if_mtu = mtu;
0356         _net("I/F MTU %u", mtu);
0357     }
0358 
0359     if (mtu == 0) {
0360         /* they didn't give us a size, estimate one */
0361         mtu = peer->if_mtu;
0362         if (mtu > 1500) {
0363             mtu >>= 1;
0364             if (mtu < 1500)
0365                 mtu = 1500;
0366         } else {
0367             mtu -= 100;
0368             if (mtu < peer->hdrsize)
0369                 mtu = peer->hdrsize + 4;
0370         }
0371     }
0372 
0373     if (mtu < peer->mtu) {
0374         spin_lock_bh(&peer->lock);
0375         peer->mtu = mtu;
0376         peer->maxdata = peer->mtu - peer->hdrsize;
0377         spin_unlock_bh(&peer->lock);
0378         _net("Net MTU %u (maxdata %u)",
0379              peer->mtu, peer->maxdata);
0380     }
0381 }
0382 
0383 /*
0384  * Handle an error received on the local endpoint.
0385  */
0386 void rxrpc_error_report(struct sock *sk)
0387 {
0388     struct sock_exterr_skb *serr;
0389     struct sockaddr_rxrpc srx;
0390     struct rxrpc_local *local;
0391     struct rxrpc_peer *peer = NULL;
0392     struct sk_buff *skb;
0393 
0394     rcu_read_lock();
0395     local = rcu_dereference_sk_user_data(sk);
0396     if (unlikely(!local)) {
0397         rcu_read_unlock();
0398         return;
0399     }
0400     _enter("%p{%d}", sk, local->debug_id);
0401 
0402     /* Clear the outstanding error value on the socket so that it doesn't
0403      * cause kernel_sendmsg() to return it later.
0404      */
0405     sock_error(sk);
0406 
0407     skb = sock_dequeue_err_skb(sk);
0408     if (!skb) {
0409         rcu_read_unlock();
0410         _leave("UDP socket errqueue empty");
0411         return;
0412     }
0413     rxrpc_new_skb(skb, rxrpc_skb_received);
0414     serr = SKB_EXT_ERR(skb);
0415 
0416     if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) {
0417         peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
0418         if (peer && !rxrpc_get_peer_maybe(peer))
0419             peer = NULL;
0420         if (peer) {
0421             trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
0422             rxrpc_store_error(peer, serr);
0423         }
0424     }
0425 
0426     rcu_read_unlock();
0427     rxrpc_free_skb(skb, rxrpc_skb_freed);
0428     rxrpc_put_peer(peer);
0429     _leave("");
0430 }
0431 
0432 /*
0433  * Map an error report to error codes on the peer record.
0434  */
0435 static void rxrpc_store_error(struct rxrpc_peer *peer,
0436                   struct sock_exterr_skb *serr)
0437 {
0438     enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR;
0439     struct sock_extended_err *ee;
0440     int err;
0441 
0442     _enter("");
0443 
0444     ee = &serr->ee;
0445 
0446     err = ee->ee_errno;
0447 
0448     switch (ee->ee_origin) {
0449     case SO_EE_ORIGIN_ICMP:
0450         switch (ee->ee_type) {
0451         case ICMP_DEST_UNREACH:
0452             switch (ee->ee_code) {
0453             case ICMP_NET_UNREACH:
0454                 _net("Rx Received ICMP Network Unreachable");
0455                 break;
0456             case ICMP_HOST_UNREACH:
0457                 _net("Rx Received ICMP Host Unreachable");
0458                 break;
0459             case ICMP_PORT_UNREACH:
0460                 _net("Rx Received ICMP Port Unreachable");
0461                 break;
0462             case ICMP_NET_UNKNOWN:
0463                 _net("Rx Received ICMP Unknown Network");
0464                 break;
0465             case ICMP_HOST_UNKNOWN:
0466                 _net("Rx Received ICMP Unknown Host");
0467                 break;
0468             default:
0469                 _net("Rx Received ICMP DestUnreach code=%u",
0470                      ee->ee_code);
0471                 break;
0472             }
0473             break;
0474 
0475         case ICMP_TIME_EXCEEDED:
0476             _net("Rx Received ICMP TTL Exceeded");
0477             break;
0478 
0479         default:
0480             _proto("Rx Received ICMP error { type=%u code=%u }",
0481                    ee->ee_type, ee->ee_code);
0482             break;
0483         }
0484         break;
0485 
0486     case SO_EE_ORIGIN_NONE:
0487     case SO_EE_ORIGIN_LOCAL:
0488         _proto("Rx Received local error { error=%d }", err);
0489         compl = RXRPC_CALL_LOCAL_ERROR;
0490         break;
0491 
0492     case SO_EE_ORIGIN_ICMP6:
0493         if (err == EACCES)
0494             err = EHOSTUNREACH;
0495         fallthrough;
0496     default:
0497         _proto("Rx Received error report { orig=%u }", ee->ee_origin);
0498         break;
0499     }
0500 
0501     rxrpc_distribute_error(peer, err, compl);
0502 }
0503 
0504 /*
0505  * Distribute an error that occurred on a peer.
0506  */
0507 static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error,
0508                    enum rxrpc_call_completion compl)
0509 {
0510     struct rxrpc_call *call;
0511 
0512     hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) {
0513         rxrpc_see_call(call);
0514         rxrpc_set_call_completion(call, compl, 0, -error);
0515     }
0516 }
0517 
0518 /*
0519  * Perform keep-alive pings.
0520  */
0521 static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
0522                       struct list_head *collector,
0523                       time64_t base,
0524                       u8 cursor)
0525 {
0526     struct rxrpc_peer *peer;
0527     const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
0528     time64_t keepalive_at;
0529     int slot;
0530 
0531     spin_lock_bh(&rxnet->peer_hash_lock);
0532 
0533     while (!list_empty(collector)) {
0534         peer = list_entry(collector->next,
0535                   struct rxrpc_peer, keepalive_link);
0536 
0537         list_del_init(&peer->keepalive_link);
0538         if (!rxrpc_get_peer_maybe(peer))
0539             continue;
0540 
0541         if (__rxrpc_use_local(peer->local)) {
0542             spin_unlock_bh(&rxnet->peer_hash_lock);
0543 
0544             keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
0545             slot = keepalive_at - base;
0546             _debug("%02x peer %u t=%d {%pISp}",
0547                    cursor, peer->debug_id, slot, &peer->srx.transport);
0548 
0549             if (keepalive_at <= base ||
0550                 keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
0551                 rxrpc_send_keepalive(peer);
0552                 slot = RXRPC_KEEPALIVE_TIME;
0553             }
0554 
0555             /* A transmission to this peer occurred since last we
0556              * examined it so put it into the appropriate future
0557              * bucket.
0558              */
0559             slot += cursor;
0560             slot &= mask;
0561             spin_lock_bh(&rxnet->peer_hash_lock);
0562             list_add_tail(&peer->keepalive_link,
0563                       &rxnet->peer_keepalive[slot & mask]);
0564             rxrpc_unuse_local(peer->local);
0565         }
0566         rxrpc_put_peer_locked(peer);
0567     }
0568 
0569     spin_unlock_bh(&rxnet->peer_hash_lock);
0570 }
0571 
0572 /*
0573  * Perform keep-alive pings with VERSION packets to keep any NAT alive.
0574  */
0575 void rxrpc_peer_keepalive_worker(struct work_struct *work)
0576 {
0577     struct rxrpc_net *rxnet =
0578         container_of(work, struct rxrpc_net, peer_keepalive_work);
0579     const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
0580     time64_t base, now, delay;
0581     u8 cursor, stop;
0582     LIST_HEAD(collector);
0583 
0584     now = ktime_get_seconds();
0585     base = rxnet->peer_keepalive_base;
0586     cursor = rxnet->peer_keepalive_cursor;
0587     _enter("%lld,%u", base - now, cursor);
0588 
0589     if (!rxnet->live)
0590         return;
0591 
0592     /* Remove to a temporary list all the peers that are currently lodged
0593      * in expired buckets plus all new peers.
0594      *
0595      * Everything in the bucket at the cursor is processed this
0596      * second; the bucket at cursor + 1 goes at now + 1s and so
0597      * on...
0598      */
0599     spin_lock_bh(&rxnet->peer_hash_lock);
0600     list_splice_init(&rxnet->peer_keepalive_new, &collector);
0601 
0602     stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
0603     while (base <= now && (s8)(cursor - stop) < 0) {
0604         list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask],
0605                       &collector);
0606         base++;
0607         cursor++;
0608     }
0609 
0610     base = now;
0611     spin_unlock_bh(&rxnet->peer_hash_lock);
0612 
0613     rxnet->peer_keepalive_base = base;
0614     rxnet->peer_keepalive_cursor = cursor;
0615     rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor);
0616     ASSERT(list_empty(&collector));
0617 
0618     /* Schedule the timer for the next occupied timeslot. */
0619     cursor = rxnet->peer_keepalive_cursor;
0620     stop = cursor + RXRPC_KEEPALIVE_TIME - 1;
0621     for (; (s8)(cursor - stop) < 0; cursor++) {
0622         if (!list_empty(&rxnet->peer_keepalive[cursor & mask]))
0623             break;
0624         base++;
0625     }
0626 
0627     now = ktime_get_seconds();
0628     delay = base - now;
0629     if (delay < 1)
0630         delay = 1;
0631     delay *= HZ;
0632     if (rxnet->live)
0633         timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
0634 
0635     _leave("");
0636 }