Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * NET4:    Implementation of BSD Unix domain sockets.
0004  *
0005  * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
0006  *
0007  * Fixes:
0008  *      Linus Torvalds  :   Assorted bug cures.
0009  *      Niibe Yutaka    :   async I/O support.
0010  *      Carsten Paeth   :   PF_UNIX check, address fixes.
0011  *      Alan Cox    :   Limit size of allocated blocks.
0012  *      Alan Cox    :   Fixed the stupid socketpair bug.
0013  *      Alan Cox    :   BSD compatibility fine tuning.
0014  *      Alan Cox    :   Fixed a bug in connect when interrupted.
0015  *      Alan Cox    :   Sorted out a proper draft version of
0016  *                  file descriptor passing hacked up from
0017  *                  Mike Shaver's work.
0018  *      Marty Leisner   :   Fixes to fd passing
0019  *      Nick Nevin  :   recvmsg bugfix.
0020  *      Alan Cox    :   Started proper garbage collector
0021  *      Heiko EiBfeldt  :   Missing verify_area check
0022  *      Alan Cox    :   Started POSIXisms
0023  *      Andreas Schwab  :   Replace inode by dentry for proper
0024  *                  reference counting
0025  *      Kirk Petersen   :   Made this a module
0026  *      Christoph Rohland   :   Elegant non-blocking accept/connect algorithm.
0027  *                  Lots of bug fixes.
0028  *       Alexey Kuznetosv   :   Repaired (I hope) bugs introduces
0029  *                  by above two patches.
0030  *       Andrea Arcangeli   :   If possible we block in connect(2)
0031  *                  if the max backlog of the listen socket
0032  *                  is been reached. This won't break
0033  *                  old apps and it will avoid huge amount
0034  *                  of socks hashed (this for unix_gc()
0035  *                  performances reasons).
0036  *                  Security fix that limits the max
0037  *                  number of socks to 2*max_files and
0038  *                  the number of skb queueable in the
0039  *                  dgram receiver.
0040  *      Artur Skawina   :   Hash function optimizations
0041  *       Alexey Kuznetsov   :   Full scale SMP. Lot of bugs are introduced 8)
0042  *        Malcolm Beattie   :   Set peercred for socketpair
0043  *       Michal Ostrowski   :       Module initialization cleanup.
0044  *       Arnaldo C. Melo    :   Remove MOD_{INC,DEC}_USE_COUNT,
0045  *                      the core infrastructure is doing that
0046  *                      for all net proto families now (2.5.69+)
0047  *
0048  * Known differences from reference BSD that was tested:
0049  *
0050  *  [TO FIX]
0051  *  ECONNREFUSED is not returned from one end of a connected() socket to the
0052  *      other the moment one end closes.
0053  *  fstat() doesn't return st_dev=0, and give the blksize as high water mark
0054  *      and a fake inode identifier (nor the BSD first socket fstat twice bug).
0055  *  [NOT TO FIX]
0056  *  accept() returns a path name even if the connecting socket has closed
0057  *      in the meantime (BSD loses the path and gives up).
0058  *  accept() returns 0 length path for an unbound connector. BSD returns 16
0059  *      and a null first byte in the path (but not for gethost/peername - BSD bug ??)
0060  *  socketpair(...SOCK_RAW..) doesn't panic the kernel.
0061  *  BSD af_unix apparently has connect forgetting to block properly.
0062  *      (need to check this with the POSIX spec in detail)
0063  *
0064  * Differences from 2.0.0-11-... (ANK)
0065  *  Bug fixes and improvements.
0066  *      - client shutdown killed server socket.
0067  *      - removed all useless cli/sti pairs.
0068  *
0069  *  Semantic changes/extensions.
0070  *      - generic control message passing.
0071  *      - SCM_CREDENTIALS control message.
0072  *      - "Abstract" (not FS based) socket bindings.
0073  *        Abstract names are sequences of bytes (not zero terminated)
0074  *        started by 0, so that this name space does not intersect
0075  *        with BSD names.
0076  */
0077 
0078 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0079 
0080 #include <linux/module.h>
0081 #include <linux/kernel.h>
0082 #include <linux/signal.h>
0083 #include <linux/sched/signal.h>
0084 #include <linux/errno.h>
0085 #include <linux/string.h>
0086 #include <linux/stat.h>
0087 #include <linux/dcache.h>
0088 #include <linux/namei.h>
0089 #include <linux/socket.h>
0090 #include <linux/un.h>
0091 #include <linux/fcntl.h>
0092 #include <linux/filter.h>
0093 #include <linux/termios.h>
0094 #include <linux/sockios.h>
0095 #include <linux/net.h>
0096 #include <linux/in.h>
0097 #include <linux/fs.h>
0098 #include <linux/slab.h>
0099 #include <linux/uaccess.h>
0100 #include <linux/skbuff.h>
0101 #include <linux/netdevice.h>
0102 #include <net/net_namespace.h>
0103 #include <net/sock.h>
0104 #include <net/tcp_states.h>
0105 #include <net/af_unix.h>
0106 #include <linux/proc_fs.h>
0107 #include <linux/seq_file.h>
0108 #include <net/scm.h>
0109 #include <linux/init.h>
0110 #include <linux/poll.h>
0111 #include <linux/rtnetlink.h>
0112 #include <linux/mount.h>
0113 #include <net/checksum.h>
0114 #include <linux/security.h>
0115 #include <linux/freezer.h>
0116 #include <linux/file.h>
0117 #include <linux/btf_ids.h>
0118 
0119 #include "scm.h"
0120 
0121 static atomic_long_t unix_nr_socks;
0122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
0123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
0124 
0125 /* SMP locking strategy:
0126  *    hash table is protected with spinlock.
0127  *    each socket state is protected by separate spinlock.
0128  */
0129 
0130 static unsigned int unix_unbound_hash(struct sock *sk)
0131 {
0132     unsigned long hash = (unsigned long)sk;
0133 
0134     hash ^= hash >> 16;
0135     hash ^= hash >> 8;
0136     hash ^= sk->sk_type;
0137 
0138     return hash & UNIX_HASH_MOD;
0139 }
0140 
0141 static unsigned int unix_bsd_hash(struct inode *i)
0142 {
0143     return i->i_ino & UNIX_HASH_MOD;
0144 }
0145 
0146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
0147                        int addr_len, int type)
0148 {
0149     __wsum csum = csum_partial(sunaddr, addr_len, 0);
0150     unsigned int hash;
0151 
0152     hash = (__force unsigned int)csum_fold(csum);
0153     hash ^= hash >> 8;
0154     hash ^= type;
0155 
0156     return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
0157 }
0158 
0159 static void unix_table_double_lock(struct net *net,
0160                    unsigned int hash1, unsigned int hash2)
0161 {
0162     if (hash1 == hash2) {
0163         spin_lock(&net->unx.table.locks[hash1]);
0164         return;
0165     }
0166 
0167     if (hash1 > hash2)
0168         swap(hash1, hash2);
0169 
0170     spin_lock(&net->unx.table.locks[hash1]);
0171     spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
0172 }
0173 
0174 static void unix_table_double_unlock(struct net *net,
0175                      unsigned int hash1, unsigned int hash2)
0176 {
0177     if (hash1 == hash2) {
0178         spin_unlock(&net->unx.table.locks[hash1]);
0179         return;
0180     }
0181 
0182     spin_unlock(&net->unx.table.locks[hash1]);
0183     spin_unlock(&net->unx.table.locks[hash2]);
0184 }
0185 
0186 #ifdef CONFIG_SECURITY_NETWORK
0187 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
0188 {
0189     UNIXCB(skb).secid = scm->secid;
0190 }
0191 
0192 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
0193 {
0194     scm->secid = UNIXCB(skb).secid;
0195 }
0196 
0197 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
0198 {
0199     return (scm->secid == UNIXCB(skb).secid);
0200 }
0201 #else
0202 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
0203 { }
0204 
0205 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
0206 { }
0207 
0208 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
0209 {
0210     return true;
0211 }
0212 #endif /* CONFIG_SECURITY_NETWORK */
0213 
0214 #define unix_peer(sk) (unix_sk(sk)->peer)
0215 
0216 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
0217 {
0218     return unix_peer(osk) == sk;
0219 }
0220 
0221 static inline int unix_may_send(struct sock *sk, struct sock *osk)
0222 {
0223     return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
0224 }
0225 
0226 static inline int unix_recvq_full(const struct sock *sk)
0227 {
0228     return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
0229 }
0230 
0231 static inline int unix_recvq_full_lockless(const struct sock *sk)
0232 {
0233     return skb_queue_len_lockless(&sk->sk_receive_queue) >
0234         READ_ONCE(sk->sk_max_ack_backlog);
0235 }
0236 
0237 struct sock *unix_peer_get(struct sock *s)
0238 {
0239     struct sock *peer;
0240 
0241     unix_state_lock(s);
0242     peer = unix_peer(s);
0243     if (peer)
0244         sock_hold(peer);
0245     unix_state_unlock(s);
0246     return peer;
0247 }
0248 EXPORT_SYMBOL_GPL(unix_peer_get);
0249 
0250 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
0251                          int addr_len)
0252 {
0253     struct unix_address *addr;
0254 
0255     addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
0256     if (!addr)
0257         return NULL;
0258 
0259     refcount_set(&addr->refcnt, 1);
0260     addr->len = addr_len;
0261     memcpy(addr->name, sunaddr, addr_len);
0262 
0263     return addr;
0264 }
0265 
0266 static inline void unix_release_addr(struct unix_address *addr)
0267 {
0268     if (refcount_dec_and_test(&addr->refcnt))
0269         kfree(addr);
0270 }
0271 
0272 /*
0273  *  Check unix socket name:
0274  *      - should be not zero length.
0275  *          - if started by not zero, should be NULL terminated (FS object)
0276  *      - if started by zero, it is abstract name.
0277  */
0278 
0279 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
0280 {
0281     if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
0282         addr_len > sizeof(*sunaddr))
0283         return -EINVAL;
0284 
0285     if (sunaddr->sun_family != AF_UNIX)
0286         return -EINVAL;
0287 
0288     return 0;
0289 }
0290 
0291 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
0292 {
0293     /* This may look like an off by one error but it is a bit more
0294      * subtle.  108 is the longest valid AF_UNIX path for a binding.
0295      * sun_path[108] doesn't as such exist.  However in kernel space
0296      * we are guaranteed that it is a valid memory location in our
0297      * kernel address buffer because syscall functions always pass
0298      * a pointer of struct sockaddr_storage which has a bigger buffer
0299      * than 108.
0300      */
0301     ((char *)sunaddr)[addr_len] = 0;
0302 }
0303 
0304 static void __unix_remove_socket(struct sock *sk)
0305 {
0306     sk_del_node_init(sk);
0307 }
0308 
0309 static void __unix_insert_socket(struct net *net, struct sock *sk)
0310 {
0311     DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
0312     sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
0313 }
0314 
0315 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
0316                  struct unix_address *addr, unsigned int hash)
0317 {
0318     __unix_remove_socket(sk);
0319     smp_store_release(&unix_sk(sk)->addr, addr);
0320 
0321     sk->sk_hash = hash;
0322     __unix_insert_socket(net, sk);
0323 }
0324 
0325 static void unix_remove_socket(struct net *net, struct sock *sk)
0326 {
0327     spin_lock(&net->unx.table.locks[sk->sk_hash]);
0328     __unix_remove_socket(sk);
0329     spin_unlock(&net->unx.table.locks[sk->sk_hash]);
0330 }
0331 
0332 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
0333 {
0334     spin_lock(&net->unx.table.locks[sk->sk_hash]);
0335     __unix_insert_socket(net, sk);
0336     spin_unlock(&net->unx.table.locks[sk->sk_hash]);
0337 }
0338 
0339 static void unix_insert_bsd_socket(struct sock *sk)
0340 {
0341     spin_lock(&bsd_socket_locks[sk->sk_hash]);
0342     sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
0343     spin_unlock(&bsd_socket_locks[sk->sk_hash]);
0344 }
0345 
0346 static void unix_remove_bsd_socket(struct sock *sk)
0347 {
0348     if (!hlist_unhashed(&sk->sk_bind_node)) {
0349         spin_lock(&bsd_socket_locks[sk->sk_hash]);
0350         __sk_del_bind_node(sk);
0351         spin_unlock(&bsd_socket_locks[sk->sk_hash]);
0352 
0353         sk_node_init(&sk->sk_bind_node);
0354     }
0355 }
0356 
0357 static struct sock *__unix_find_socket_byname(struct net *net,
0358                           struct sockaddr_un *sunname,
0359                           int len, unsigned int hash)
0360 {
0361     struct sock *s;
0362 
0363     sk_for_each(s, &net->unx.table.buckets[hash]) {
0364         struct unix_sock *u = unix_sk(s);
0365 
0366         if (u->addr->len == len &&
0367             !memcmp(u->addr->name, sunname, len))
0368             return s;
0369     }
0370     return NULL;
0371 }
0372 
0373 static inline struct sock *unix_find_socket_byname(struct net *net,
0374                            struct sockaddr_un *sunname,
0375                            int len, unsigned int hash)
0376 {
0377     struct sock *s;
0378 
0379     spin_lock(&net->unx.table.locks[hash]);
0380     s = __unix_find_socket_byname(net, sunname, len, hash);
0381     if (s)
0382         sock_hold(s);
0383     spin_unlock(&net->unx.table.locks[hash]);
0384     return s;
0385 }
0386 
0387 static struct sock *unix_find_socket_byinode(struct inode *i)
0388 {
0389     unsigned int hash = unix_bsd_hash(i);
0390     struct sock *s;
0391 
0392     spin_lock(&bsd_socket_locks[hash]);
0393     sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
0394         struct dentry *dentry = unix_sk(s)->path.dentry;
0395 
0396         if (dentry && d_backing_inode(dentry) == i) {
0397             sock_hold(s);
0398             spin_unlock(&bsd_socket_locks[hash]);
0399             return s;
0400         }
0401     }
0402     spin_unlock(&bsd_socket_locks[hash]);
0403     return NULL;
0404 }
0405 
0406 /* Support code for asymmetrically connected dgram sockets
0407  *
0408  * If a datagram socket is connected to a socket not itself connected
0409  * to the first socket (eg, /dev/log), clients may only enqueue more
0410  * messages if the present receive queue of the server socket is not
0411  * "too large". This means there's a second writeability condition
0412  * poll and sendmsg need to test. The dgram recv code will do a wake
0413  * up on the peer_wait wait queue of a socket upon reception of a
0414  * datagram which needs to be propagated to sleeping would-be writers
0415  * since these might not have sent anything so far. This can't be
0416  * accomplished via poll_wait because the lifetime of the server
0417  * socket might be less than that of its clients if these break their
0418  * association with it or if the server socket is closed while clients
0419  * are still connected to it and there's no way to inform "a polling
0420  * implementation" that it should let go of a certain wait queue
0421  *
0422  * In order to propagate a wake up, a wait_queue_entry_t of the client
0423  * socket is enqueued on the peer_wait queue of the server socket
0424  * whose wake function does a wake_up on the ordinary client socket
0425  * wait queue. This connection is established whenever a write (or
0426  * poll for write) hit the flow control condition and broken when the
0427  * association to the server socket is dissolved or after a wake up
0428  * was relayed.
0429  */
0430 
0431 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
0432                       void *key)
0433 {
0434     struct unix_sock *u;
0435     wait_queue_head_t *u_sleep;
0436 
0437     u = container_of(q, struct unix_sock, peer_wake);
0438 
0439     __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
0440                 q);
0441     u->peer_wake.private = NULL;
0442 
0443     /* relaying can only happen while the wq still exists */
0444     u_sleep = sk_sleep(&u->sk);
0445     if (u_sleep)
0446         wake_up_interruptible_poll(u_sleep, key_to_poll(key));
0447 
0448     return 0;
0449 }
0450 
0451 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
0452 {
0453     struct unix_sock *u, *u_other;
0454     int rc;
0455 
0456     u = unix_sk(sk);
0457     u_other = unix_sk(other);
0458     rc = 0;
0459     spin_lock(&u_other->peer_wait.lock);
0460 
0461     if (!u->peer_wake.private) {
0462         u->peer_wake.private = other;
0463         __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
0464 
0465         rc = 1;
0466     }
0467 
0468     spin_unlock(&u_other->peer_wait.lock);
0469     return rc;
0470 }
0471 
0472 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
0473                         struct sock *other)
0474 {
0475     struct unix_sock *u, *u_other;
0476 
0477     u = unix_sk(sk);
0478     u_other = unix_sk(other);
0479     spin_lock(&u_other->peer_wait.lock);
0480 
0481     if (u->peer_wake.private == other) {
0482         __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
0483         u->peer_wake.private = NULL;
0484     }
0485 
0486     spin_unlock(&u_other->peer_wait.lock);
0487 }
0488 
0489 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
0490                            struct sock *other)
0491 {
0492     unix_dgram_peer_wake_disconnect(sk, other);
0493     wake_up_interruptible_poll(sk_sleep(sk),
0494                    EPOLLOUT |
0495                    EPOLLWRNORM |
0496                    EPOLLWRBAND);
0497 }
0498 
0499 /* preconditions:
0500  *  - unix_peer(sk) == other
0501  *  - association is stable
0502  */
0503 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
0504 {
0505     int connected;
0506 
0507     connected = unix_dgram_peer_wake_connect(sk, other);
0508 
0509     /* If other is SOCK_DEAD, we want to make sure we signal
0510      * POLLOUT, such that a subsequent write() can get a
0511      * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
0512      * to other and its full, we will hang waiting for POLLOUT.
0513      */
0514     if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
0515         return 1;
0516 
0517     if (connected)
0518         unix_dgram_peer_wake_disconnect(sk, other);
0519 
0520     return 0;
0521 }
0522 
0523 static int unix_writable(const struct sock *sk)
0524 {
0525     return sk->sk_state != TCP_LISTEN &&
0526            (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
0527 }
0528 
0529 static void unix_write_space(struct sock *sk)
0530 {
0531     struct socket_wq *wq;
0532 
0533     rcu_read_lock();
0534     if (unix_writable(sk)) {
0535         wq = rcu_dereference(sk->sk_wq);
0536         if (skwq_has_sleeper(wq))
0537             wake_up_interruptible_sync_poll(&wq->wait,
0538                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
0539         sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
0540     }
0541     rcu_read_unlock();
0542 }
0543 
0544 /* When dgram socket disconnects (or changes its peer), we clear its receive
0545  * queue of packets arrived from previous peer. First, it allows to do
0546  * flow control based only on wmem_alloc; second, sk connected to peer
0547  * may receive messages only from that peer. */
0548 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
0549 {
0550     if (!skb_queue_empty(&sk->sk_receive_queue)) {
0551         skb_queue_purge(&sk->sk_receive_queue);
0552         wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
0553 
0554         /* If one link of bidirectional dgram pipe is disconnected,
0555          * we signal error. Messages are lost. Do not make this,
0556          * when peer was not connected to us.
0557          */
0558         if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
0559             other->sk_err = ECONNRESET;
0560             sk_error_report(other);
0561         }
0562     }
0563     other->sk_state = TCP_CLOSE;
0564 }
0565 
0566 static void unix_sock_destructor(struct sock *sk)
0567 {
0568     struct unix_sock *u = unix_sk(sk);
0569 
0570     skb_queue_purge(&sk->sk_receive_queue);
0571 
0572 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
0573     if (u->oob_skb) {
0574         kfree_skb(u->oob_skb);
0575         u->oob_skb = NULL;
0576     }
0577 #endif
0578     DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
0579     DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
0580     DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
0581     if (!sock_flag(sk, SOCK_DEAD)) {
0582         pr_info("Attempt to release alive unix socket: %p\n", sk);
0583         return;
0584     }
0585 
0586     if (u->addr)
0587         unix_release_addr(u->addr);
0588 
0589     atomic_long_dec(&unix_nr_socks);
0590     sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
0591 #ifdef UNIX_REFCNT_DEBUG
0592     pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
0593         atomic_long_read(&unix_nr_socks));
0594 #endif
0595 }
0596 
0597 static void unix_release_sock(struct sock *sk, int embrion)
0598 {
0599     struct unix_sock *u = unix_sk(sk);
0600     struct sock *skpair;
0601     struct sk_buff *skb;
0602     struct path path;
0603     int state;
0604 
0605     unix_remove_socket(sock_net(sk), sk);
0606     unix_remove_bsd_socket(sk);
0607 
0608     /* Clear state */
0609     unix_state_lock(sk);
0610     sock_orphan(sk);
0611     sk->sk_shutdown = SHUTDOWN_MASK;
0612     path         = u->path;
0613     u->path.dentry = NULL;
0614     u->path.mnt = NULL;
0615     state = sk->sk_state;
0616     sk->sk_state = TCP_CLOSE;
0617 
0618     skpair = unix_peer(sk);
0619     unix_peer(sk) = NULL;
0620 
0621     unix_state_unlock(sk);
0622 
0623     wake_up_interruptible_all(&u->peer_wait);
0624 
0625     if (skpair != NULL) {
0626         if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
0627             unix_state_lock(skpair);
0628             /* No more writes */
0629             skpair->sk_shutdown = SHUTDOWN_MASK;
0630             if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
0631                 skpair->sk_err = ECONNRESET;
0632             unix_state_unlock(skpair);
0633             skpair->sk_state_change(skpair);
0634             sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
0635         }
0636 
0637         unix_dgram_peer_wake_disconnect(sk, skpair);
0638         sock_put(skpair); /* It may now die */
0639     }
0640 
0641     /* Try to flush out this socket. Throw out buffers at least */
0642 
0643     while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
0644         if (state == TCP_LISTEN)
0645             unix_release_sock(skb->sk, 1);
0646         /* passed fds are erased in the kfree_skb hook        */
0647         UNIXCB(skb).consumed = skb->len;
0648         kfree_skb(skb);
0649     }
0650 
0651     if (path.dentry)
0652         path_put(&path);
0653 
0654     sock_put(sk);
0655 
0656     /* ---- Socket is dead now and most probably destroyed ---- */
0657 
0658     /*
0659      * Fixme: BSD difference: In BSD all sockets connected to us get
0660      *    ECONNRESET and we die on the spot. In Linux we behave
0661      *    like files and pipes do and wait for the last
0662      *    dereference.
0663      *
0664      * Can't we simply set sock->err?
0665      *
0666      *    What the above comment does talk about? --ANK(980817)
0667      */
0668 
0669     if (unix_tot_inflight)
0670         unix_gc();      /* Garbage collect fds */
0671 }
0672 
0673 static void init_peercred(struct sock *sk)
0674 {
0675     const struct cred *old_cred;
0676     struct pid *old_pid;
0677 
0678     spin_lock(&sk->sk_peer_lock);
0679     old_pid = sk->sk_peer_pid;
0680     old_cred = sk->sk_peer_cred;
0681     sk->sk_peer_pid  = get_pid(task_tgid(current));
0682     sk->sk_peer_cred = get_current_cred();
0683     spin_unlock(&sk->sk_peer_lock);
0684 
0685     put_pid(old_pid);
0686     put_cred(old_cred);
0687 }
0688 
0689 static void copy_peercred(struct sock *sk, struct sock *peersk)
0690 {
0691     const struct cred *old_cred;
0692     struct pid *old_pid;
0693 
0694     if (sk < peersk) {
0695         spin_lock(&sk->sk_peer_lock);
0696         spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
0697     } else {
0698         spin_lock(&peersk->sk_peer_lock);
0699         spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
0700     }
0701     old_pid = sk->sk_peer_pid;
0702     old_cred = sk->sk_peer_cred;
0703     sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
0704     sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
0705 
0706     spin_unlock(&sk->sk_peer_lock);
0707     spin_unlock(&peersk->sk_peer_lock);
0708 
0709     put_pid(old_pid);
0710     put_cred(old_cred);
0711 }
0712 
0713 static int unix_listen(struct socket *sock, int backlog)
0714 {
0715     int err;
0716     struct sock *sk = sock->sk;
0717     struct unix_sock *u = unix_sk(sk);
0718 
0719     err = -EOPNOTSUPP;
0720     if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
0721         goto out;   /* Only stream/seqpacket sockets accept */
0722     err = -EINVAL;
0723     if (!u->addr)
0724         goto out;   /* No listens on an unbound socket */
0725     unix_state_lock(sk);
0726     if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
0727         goto out_unlock;
0728     if (backlog > sk->sk_max_ack_backlog)
0729         wake_up_interruptible_all(&u->peer_wait);
0730     sk->sk_max_ack_backlog  = backlog;
0731     sk->sk_state        = TCP_LISTEN;
0732     /* set credentials so connect can copy them */
0733     init_peercred(sk);
0734     err = 0;
0735 
0736 out_unlock:
0737     unix_state_unlock(sk);
0738 out:
0739     return err;
0740 }
0741 
0742 static int unix_release(struct socket *);
0743 static int unix_bind(struct socket *, struct sockaddr *, int);
0744 static int unix_stream_connect(struct socket *, struct sockaddr *,
0745                    int addr_len, int flags);
0746 static int unix_socketpair(struct socket *, struct socket *);
0747 static int unix_accept(struct socket *, struct socket *, int, bool);
0748 static int unix_getname(struct socket *, struct sockaddr *, int);
0749 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
0750 static __poll_t unix_dgram_poll(struct file *, struct socket *,
0751                     poll_table *);
0752 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
0753 #ifdef CONFIG_COMPAT
0754 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
0755 #endif
0756 static int unix_shutdown(struct socket *, int);
0757 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
0758 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
0759 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
0760                     size_t size, int flags);
0761 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
0762                        struct pipe_inode_info *, size_t size,
0763                        unsigned int flags);
0764 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
0765 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
0766 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
0767 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
0768 static int unix_dgram_connect(struct socket *, struct sockaddr *,
0769                   int, int);
0770 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
0771 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
0772                   int);
0773 
0774 static int unix_set_peek_off(struct sock *sk, int val)
0775 {
0776     struct unix_sock *u = unix_sk(sk);
0777 
0778     if (mutex_lock_interruptible(&u->iolock))
0779         return -EINTR;
0780 
0781     sk->sk_peek_off = val;
0782     mutex_unlock(&u->iolock);
0783 
0784     return 0;
0785 }
0786 
0787 #ifdef CONFIG_PROC_FS
0788 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
0789 {
0790     struct sock *sk = sock->sk;
0791     struct unix_sock *u;
0792 
0793     if (sk) {
0794         u = unix_sk(sock->sk);
0795         seq_printf(m, "scm_fds: %u\n",
0796                atomic_read(&u->scm_stat.nr_fds));
0797     }
0798 }
0799 #else
0800 #define unix_show_fdinfo NULL
0801 #endif
0802 
0803 static const struct proto_ops unix_stream_ops = {
0804     .family =   PF_UNIX,
0805     .owner =    THIS_MODULE,
0806     .release =  unix_release,
0807     .bind =     unix_bind,
0808     .connect =  unix_stream_connect,
0809     .socketpair =   unix_socketpair,
0810     .accept =   unix_accept,
0811     .getname =  unix_getname,
0812     .poll =     unix_poll,
0813     .ioctl =    unix_ioctl,
0814 #ifdef CONFIG_COMPAT
0815     .compat_ioctl = unix_compat_ioctl,
0816 #endif
0817     .listen =   unix_listen,
0818     .shutdown = unix_shutdown,
0819     .sendmsg =  unix_stream_sendmsg,
0820     .recvmsg =  unix_stream_recvmsg,
0821     .read_skb = unix_stream_read_skb,
0822     .mmap =     sock_no_mmap,
0823     .sendpage = unix_stream_sendpage,
0824     .splice_read =  unix_stream_splice_read,
0825     .set_peek_off = unix_set_peek_off,
0826     .show_fdinfo =  unix_show_fdinfo,
0827 };
0828 
0829 static const struct proto_ops unix_dgram_ops = {
0830     .family =   PF_UNIX,
0831     .owner =    THIS_MODULE,
0832     .release =  unix_release,
0833     .bind =     unix_bind,
0834     .connect =  unix_dgram_connect,
0835     .socketpair =   unix_socketpair,
0836     .accept =   sock_no_accept,
0837     .getname =  unix_getname,
0838     .poll =     unix_dgram_poll,
0839     .ioctl =    unix_ioctl,
0840 #ifdef CONFIG_COMPAT
0841     .compat_ioctl = unix_compat_ioctl,
0842 #endif
0843     .listen =   sock_no_listen,
0844     .shutdown = unix_shutdown,
0845     .sendmsg =  unix_dgram_sendmsg,
0846     .read_skb = unix_read_skb,
0847     .recvmsg =  unix_dgram_recvmsg,
0848     .mmap =     sock_no_mmap,
0849     .sendpage = sock_no_sendpage,
0850     .set_peek_off = unix_set_peek_off,
0851     .show_fdinfo =  unix_show_fdinfo,
0852 };
0853 
0854 static const struct proto_ops unix_seqpacket_ops = {
0855     .family =   PF_UNIX,
0856     .owner =    THIS_MODULE,
0857     .release =  unix_release,
0858     .bind =     unix_bind,
0859     .connect =  unix_stream_connect,
0860     .socketpair =   unix_socketpair,
0861     .accept =   unix_accept,
0862     .getname =  unix_getname,
0863     .poll =     unix_dgram_poll,
0864     .ioctl =    unix_ioctl,
0865 #ifdef CONFIG_COMPAT
0866     .compat_ioctl = unix_compat_ioctl,
0867 #endif
0868     .listen =   unix_listen,
0869     .shutdown = unix_shutdown,
0870     .sendmsg =  unix_seqpacket_sendmsg,
0871     .recvmsg =  unix_seqpacket_recvmsg,
0872     .mmap =     sock_no_mmap,
0873     .sendpage = sock_no_sendpage,
0874     .set_peek_off = unix_set_peek_off,
0875     .show_fdinfo =  unix_show_fdinfo,
0876 };
0877 
0878 static void unix_close(struct sock *sk, long timeout)
0879 {
0880     /* Nothing to do here, unix socket does not need a ->close().
0881      * This is merely for sockmap.
0882      */
0883 }
0884 
0885 static void unix_unhash(struct sock *sk)
0886 {
0887     /* Nothing to do here, unix socket does not need a ->unhash().
0888      * This is merely for sockmap.
0889      */
0890 }
0891 
0892 struct proto unix_dgram_proto = {
0893     .name           = "UNIX",
0894     .owner          = THIS_MODULE,
0895     .obj_size       = sizeof(struct unix_sock),
0896     .close          = unix_close,
0897 #ifdef CONFIG_BPF_SYSCALL
0898     .psock_update_sk_prot   = unix_dgram_bpf_update_proto,
0899 #endif
0900 };
0901 
0902 struct proto unix_stream_proto = {
0903     .name           = "UNIX-STREAM",
0904     .owner          = THIS_MODULE,
0905     .obj_size       = sizeof(struct unix_sock),
0906     .close          = unix_close,
0907     .unhash         = unix_unhash,
0908 #ifdef CONFIG_BPF_SYSCALL
0909     .psock_update_sk_prot   = unix_stream_bpf_update_proto,
0910 #endif
0911 };
0912 
0913 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
0914 {
0915     struct unix_sock *u;
0916     struct sock *sk;
0917     int err;
0918 
0919     atomic_long_inc(&unix_nr_socks);
0920     if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
0921         err = -ENFILE;
0922         goto err;
0923     }
0924 
0925     if (type == SOCK_STREAM)
0926         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
0927     else /*dgram and  seqpacket */
0928         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
0929 
0930     if (!sk) {
0931         err = -ENOMEM;
0932         goto err;
0933     }
0934 
0935     sock_init_data(sock, sk);
0936 
0937     sk->sk_hash     = unix_unbound_hash(sk);
0938     sk->sk_allocation   = GFP_KERNEL_ACCOUNT;
0939     sk->sk_write_space  = unix_write_space;
0940     sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
0941     sk->sk_destruct     = unix_sock_destructor;
0942     u     = unix_sk(sk);
0943     u->path.dentry = NULL;
0944     u->path.mnt = NULL;
0945     spin_lock_init(&u->lock);
0946     atomic_long_set(&u->inflight, 0);
0947     INIT_LIST_HEAD(&u->link);
0948     mutex_init(&u->iolock); /* single task reading lock */
0949     mutex_init(&u->bindlock); /* single task binding lock */
0950     init_waitqueue_head(&u->peer_wait);
0951     init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
0952     memset(&u->scm_stat, 0, sizeof(struct scm_stat));
0953     unix_insert_unbound_socket(net, sk);
0954 
0955     sock_prot_inuse_add(net, sk->sk_prot, 1);
0956 
0957     return sk;
0958 
0959 err:
0960     atomic_long_dec(&unix_nr_socks);
0961     return ERR_PTR(err);
0962 }
0963 
0964 static int unix_create(struct net *net, struct socket *sock, int protocol,
0965                int kern)
0966 {
0967     struct sock *sk;
0968 
0969     if (protocol && protocol != PF_UNIX)
0970         return -EPROTONOSUPPORT;
0971 
0972     sock->state = SS_UNCONNECTED;
0973 
0974     switch (sock->type) {
0975     case SOCK_STREAM:
0976         sock->ops = &unix_stream_ops;
0977         break;
0978         /*
0979          *  Believe it or not BSD has AF_UNIX, SOCK_RAW though
0980          *  nothing uses it.
0981          */
0982     case SOCK_RAW:
0983         sock->type = SOCK_DGRAM;
0984         fallthrough;
0985     case SOCK_DGRAM:
0986         sock->ops = &unix_dgram_ops;
0987         break;
0988     case SOCK_SEQPACKET:
0989         sock->ops = &unix_seqpacket_ops;
0990         break;
0991     default:
0992         return -ESOCKTNOSUPPORT;
0993     }
0994 
0995     sk = unix_create1(net, sock, kern, sock->type);
0996     if (IS_ERR(sk))
0997         return PTR_ERR(sk);
0998 
0999     return 0;
1000 }
1001 
1002 static int unix_release(struct socket *sock)
1003 {
1004     struct sock *sk = sock->sk;
1005 
1006     if (!sk)
1007         return 0;
1008 
1009     sk->sk_prot->close(sk, 0);
1010     unix_release_sock(sk, 0);
1011     sock->sk = NULL;
1012 
1013     return 0;
1014 }
1015 
1016 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1017                   int type)
1018 {
1019     struct inode *inode;
1020     struct path path;
1021     struct sock *sk;
1022     int err;
1023 
1024     unix_mkname_bsd(sunaddr, addr_len);
1025     err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1026     if (err)
1027         goto fail;
1028 
1029     err = path_permission(&path, MAY_WRITE);
1030     if (err)
1031         goto path_put;
1032 
1033     err = -ECONNREFUSED;
1034     inode = d_backing_inode(path.dentry);
1035     if (!S_ISSOCK(inode->i_mode))
1036         goto path_put;
1037 
1038     sk = unix_find_socket_byinode(inode);
1039     if (!sk)
1040         goto path_put;
1041 
1042     err = -EPROTOTYPE;
1043     if (sk->sk_type == type)
1044         touch_atime(&path);
1045     else
1046         goto sock_put;
1047 
1048     path_put(&path);
1049 
1050     return sk;
1051 
1052 sock_put:
1053     sock_put(sk);
1054 path_put:
1055     path_put(&path);
1056 fail:
1057     return ERR_PTR(err);
1058 }
1059 
1060 static struct sock *unix_find_abstract(struct net *net,
1061                        struct sockaddr_un *sunaddr,
1062                        int addr_len, int type)
1063 {
1064     unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1065     struct dentry *dentry;
1066     struct sock *sk;
1067 
1068     sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1069     if (!sk)
1070         return ERR_PTR(-ECONNREFUSED);
1071 
1072     dentry = unix_sk(sk)->path.dentry;
1073     if (dentry)
1074         touch_atime(&unix_sk(sk)->path);
1075 
1076     return sk;
1077 }
1078 
1079 static struct sock *unix_find_other(struct net *net,
1080                     struct sockaddr_un *sunaddr,
1081                     int addr_len, int type)
1082 {
1083     struct sock *sk;
1084 
1085     if (sunaddr->sun_path[0])
1086         sk = unix_find_bsd(sunaddr, addr_len, type);
1087     else
1088         sk = unix_find_abstract(net, sunaddr, addr_len, type);
1089 
1090     return sk;
1091 }
1092 
1093 static int unix_autobind(struct sock *sk)
1094 {
1095     unsigned int new_hash, old_hash = sk->sk_hash;
1096     struct unix_sock *u = unix_sk(sk);
1097     struct net *net = sock_net(sk);
1098     struct unix_address *addr;
1099     u32 lastnum, ordernum;
1100     int err;
1101 
1102     err = mutex_lock_interruptible(&u->bindlock);
1103     if (err)
1104         return err;
1105 
1106     if (u->addr)
1107         goto out;
1108 
1109     err = -ENOMEM;
1110     addr = kzalloc(sizeof(*addr) +
1111                offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1112     if (!addr)
1113         goto out;
1114 
1115     addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1116     addr->name->sun_family = AF_UNIX;
1117     refcount_set(&addr->refcnt, 1);
1118 
1119     ordernum = prandom_u32();
1120     lastnum = ordernum & 0xFFFFF;
1121 retry:
1122     ordernum = (ordernum + 1) & 0xFFFFF;
1123     sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1124 
1125     new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1126     unix_table_double_lock(net, old_hash, new_hash);
1127 
1128     if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1129         unix_table_double_unlock(net, old_hash, new_hash);
1130 
1131         /* __unix_find_socket_byname() may take long time if many names
1132          * are already in use.
1133          */
1134         cond_resched();
1135 
1136         if (ordernum == lastnum) {
1137             /* Give up if all names seems to be in use. */
1138             err = -ENOSPC;
1139             unix_release_addr(addr);
1140             goto out;
1141         }
1142 
1143         goto retry;
1144     }
1145 
1146     __unix_set_addr_hash(net, sk, addr, new_hash);
1147     unix_table_double_unlock(net, old_hash, new_hash);
1148     err = 0;
1149 
1150 out:    mutex_unlock(&u->bindlock);
1151     return err;
1152 }
1153 
1154 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1155              int addr_len)
1156 {
1157     umode_t mode = S_IFSOCK |
1158            (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1159     unsigned int new_hash, old_hash = sk->sk_hash;
1160     struct unix_sock *u = unix_sk(sk);
1161     struct net *net = sock_net(sk);
1162     struct user_namespace *ns; // barf...
1163     struct unix_address *addr;
1164     struct dentry *dentry;
1165     struct path parent;
1166     int err;
1167 
1168     unix_mkname_bsd(sunaddr, addr_len);
1169     addr_len = strlen(sunaddr->sun_path) +
1170         offsetof(struct sockaddr_un, sun_path) + 1;
1171 
1172     addr = unix_create_addr(sunaddr, addr_len);
1173     if (!addr)
1174         return -ENOMEM;
1175 
1176     /*
1177      * Get the parent directory, calculate the hash for last
1178      * component.
1179      */
1180     dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1181     if (IS_ERR(dentry)) {
1182         err = PTR_ERR(dentry);
1183         goto out;
1184     }
1185 
1186     /*
1187      * All right, let's create it.
1188      */
1189     ns = mnt_user_ns(parent.mnt);
1190     err = security_path_mknod(&parent, dentry, mode, 0);
1191     if (!err)
1192         err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
1193     if (err)
1194         goto out_path;
1195     err = mutex_lock_interruptible(&u->bindlock);
1196     if (err)
1197         goto out_unlink;
1198     if (u->addr)
1199         goto out_unlock;
1200 
1201     new_hash = unix_bsd_hash(d_backing_inode(dentry));
1202     unix_table_double_lock(net, old_hash, new_hash);
1203     u->path.mnt = mntget(parent.mnt);
1204     u->path.dentry = dget(dentry);
1205     __unix_set_addr_hash(net, sk, addr, new_hash);
1206     unix_table_double_unlock(net, old_hash, new_hash);
1207     unix_insert_bsd_socket(sk);
1208     mutex_unlock(&u->bindlock);
1209     done_path_create(&parent, dentry);
1210     return 0;
1211 
1212 out_unlock:
1213     mutex_unlock(&u->bindlock);
1214     err = -EINVAL;
1215 out_unlink:
1216     /* failed after successful mknod?  unlink what we'd created... */
1217     vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1218 out_path:
1219     done_path_create(&parent, dentry);
1220 out:
1221     unix_release_addr(addr);
1222     return err == -EEXIST ? -EADDRINUSE : err;
1223 }
1224 
1225 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1226                   int addr_len)
1227 {
1228     unsigned int new_hash, old_hash = sk->sk_hash;
1229     struct unix_sock *u = unix_sk(sk);
1230     struct net *net = sock_net(sk);
1231     struct unix_address *addr;
1232     int err;
1233 
1234     addr = unix_create_addr(sunaddr, addr_len);
1235     if (!addr)
1236         return -ENOMEM;
1237 
1238     err = mutex_lock_interruptible(&u->bindlock);
1239     if (err)
1240         goto out;
1241 
1242     if (u->addr) {
1243         err = -EINVAL;
1244         goto out_mutex;
1245     }
1246 
1247     new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1248     unix_table_double_lock(net, old_hash, new_hash);
1249 
1250     if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1251         goto out_spin;
1252 
1253     __unix_set_addr_hash(net, sk, addr, new_hash);
1254     unix_table_double_unlock(net, old_hash, new_hash);
1255     mutex_unlock(&u->bindlock);
1256     return 0;
1257 
1258 out_spin:
1259     unix_table_double_unlock(net, old_hash, new_hash);
1260     err = -EADDRINUSE;
1261 out_mutex:
1262     mutex_unlock(&u->bindlock);
1263 out:
1264     unix_release_addr(addr);
1265     return err;
1266 }
1267 
1268 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1269 {
1270     struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1271     struct sock *sk = sock->sk;
1272     int err;
1273 
1274     if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1275         sunaddr->sun_family == AF_UNIX)
1276         return unix_autobind(sk);
1277 
1278     err = unix_validate_addr(sunaddr, addr_len);
1279     if (err)
1280         return err;
1281 
1282     if (sunaddr->sun_path[0])
1283         err = unix_bind_bsd(sk, sunaddr, addr_len);
1284     else
1285         err = unix_bind_abstract(sk, sunaddr, addr_len);
1286 
1287     return err;
1288 }
1289 
1290 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1291 {
1292     if (unlikely(sk1 == sk2) || !sk2) {
1293         unix_state_lock(sk1);
1294         return;
1295     }
1296     if (sk1 < sk2) {
1297         unix_state_lock(sk1);
1298         unix_state_lock_nested(sk2);
1299     } else {
1300         unix_state_lock(sk2);
1301         unix_state_lock_nested(sk1);
1302     }
1303 }
1304 
1305 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1306 {
1307     if (unlikely(sk1 == sk2) || !sk2) {
1308         unix_state_unlock(sk1);
1309         return;
1310     }
1311     unix_state_unlock(sk1);
1312     unix_state_unlock(sk2);
1313 }
1314 
1315 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1316                   int alen, int flags)
1317 {
1318     struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1319     struct sock *sk = sock->sk;
1320     struct sock *other;
1321     int err;
1322 
1323     err = -EINVAL;
1324     if (alen < offsetofend(struct sockaddr, sa_family))
1325         goto out;
1326 
1327     if (addr->sa_family != AF_UNSPEC) {
1328         err = unix_validate_addr(sunaddr, alen);
1329         if (err)
1330             goto out;
1331 
1332         if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1333             !unix_sk(sk)->addr) {
1334             err = unix_autobind(sk);
1335             if (err)
1336                 goto out;
1337         }
1338 
1339 restart:
1340         other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1341         if (IS_ERR(other)) {
1342             err = PTR_ERR(other);
1343             goto out;
1344         }
1345 
1346         unix_state_double_lock(sk, other);
1347 
1348         /* Apparently VFS overslept socket death. Retry. */
1349         if (sock_flag(other, SOCK_DEAD)) {
1350             unix_state_double_unlock(sk, other);
1351             sock_put(other);
1352             goto restart;
1353         }
1354 
1355         err = -EPERM;
1356         if (!unix_may_send(sk, other))
1357             goto out_unlock;
1358 
1359         err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1360         if (err)
1361             goto out_unlock;
1362 
1363         sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1364     } else {
1365         /*
1366          *  1003.1g breaking connected state with AF_UNSPEC
1367          */
1368         other = NULL;
1369         unix_state_double_lock(sk, other);
1370     }
1371 
1372     /*
1373      * If it was connected, reconnect.
1374      */
1375     if (unix_peer(sk)) {
1376         struct sock *old_peer = unix_peer(sk);
1377 
1378         unix_peer(sk) = other;
1379         if (!other)
1380             sk->sk_state = TCP_CLOSE;
1381         unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1382 
1383         unix_state_double_unlock(sk, other);
1384 
1385         if (other != old_peer)
1386             unix_dgram_disconnected(sk, old_peer);
1387         sock_put(old_peer);
1388     } else {
1389         unix_peer(sk) = other;
1390         unix_state_double_unlock(sk, other);
1391     }
1392 
1393     return 0;
1394 
1395 out_unlock:
1396     unix_state_double_unlock(sk, other);
1397     sock_put(other);
1398 out:
1399     return err;
1400 }
1401 
1402 static long unix_wait_for_peer(struct sock *other, long timeo)
1403     __releases(&unix_sk(other)->lock)
1404 {
1405     struct unix_sock *u = unix_sk(other);
1406     int sched;
1407     DEFINE_WAIT(wait);
1408 
1409     prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1410 
1411     sched = !sock_flag(other, SOCK_DEAD) &&
1412         !(other->sk_shutdown & RCV_SHUTDOWN) &&
1413         unix_recvq_full(other);
1414 
1415     unix_state_unlock(other);
1416 
1417     if (sched)
1418         timeo = schedule_timeout(timeo);
1419 
1420     finish_wait(&u->peer_wait, &wait);
1421     return timeo;
1422 }
1423 
1424 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1425                    int addr_len, int flags)
1426 {
1427     struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1428     struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1429     struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1430     struct net *net = sock_net(sk);
1431     struct sk_buff *skb = NULL;
1432     long timeo;
1433     int err;
1434     int st;
1435 
1436     err = unix_validate_addr(sunaddr, addr_len);
1437     if (err)
1438         goto out;
1439 
1440     if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1441         err = unix_autobind(sk);
1442         if (err)
1443             goto out;
1444     }
1445 
1446     timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1447 
1448     /* First of all allocate resources.
1449        If we will make it after state is locked,
1450        we will have to recheck all again in any case.
1451      */
1452 
1453     /* create new sock for complete connection */
1454     newsk = unix_create1(net, NULL, 0, sock->type);
1455     if (IS_ERR(newsk)) {
1456         err = PTR_ERR(newsk);
1457         newsk = NULL;
1458         goto out;
1459     }
1460 
1461     err = -ENOMEM;
1462 
1463     /* Allocate skb for sending to listening sock */
1464     skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1465     if (skb == NULL)
1466         goto out;
1467 
1468 restart:
1469     /*  Find listening sock. */
1470     other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1471     if (IS_ERR(other)) {
1472         err = PTR_ERR(other);
1473         other = NULL;
1474         goto out;
1475     }
1476 
1477     /* Latch state of peer */
1478     unix_state_lock(other);
1479 
1480     /* Apparently VFS overslept socket death. Retry. */
1481     if (sock_flag(other, SOCK_DEAD)) {
1482         unix_state_unlock(other);
1483         sock_put(other);
1484         goto restart;
1485     }
1486 
1487     err = -ECONNREFUSED;
1488     if (other->sk_state != TCP_LISTEN)
1489         goto out_unlock;
1490     if (other->sk_shutdown & RCV_SHUTDOWN)
1491         goto out_unlock;
1492 
1493     if (unix_recvq_full(other)) {
1494         err = -EAGAIN;
1495         if (!timeo)
1496             goto out_unlock;
1497 
1498         timeo = unix_wait_for_peer(other, timeo);
1499 
1500         err = sock_intr_errno(timeo);
1501         if (signal_pending(current))
1502             goto out;
1503         sock_put(other);
1504         goto restart;
1505     }
1506 
1507     /* Latch our state.
1508 
1509        It is tricky place. We need to grab our state lock and cannot
1510        drop lock on peer. It is dangerous because deadlock is
1511        possible. Connect to self case and simultaneous
1512        attempt to connect are eliminated by checking socket
1513        state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1514        check this before attempt to grab lock.
1515 
1516        Well, and we have to recheck the state after socket locked.
1517      */
1518     st = sk->sk_state;
1519 
1520     switch (st) {
1521     case TCP_CLOSE:
1522         /* This is ok... continue with connect */
1523         break;
1524     case TCP_ESTABLISHED:
1525         /* Socket is already connected */
1526         err = -EISCONN;
1527         goto out_unlock;
1528     default:
1529         err = -EINVAL;
1530         goto out_unlock;
1531     }
1532 
1533     unix_state_lock_nested(sk);
1534 
1535     if (sk->sk_state != st) {
1536         unix_state_unlock(sk);
1537         unix_state_unlock(other);
1538         sock_put(other);
1539         goto restart;
1540     }
1541 
1542     err = security_unix_stream_connect(sk, other, newsk);
1543     if (err) {
1544         unix_state_unlock(sk);
1545         goto out_unlock;
1546     }
1547 
1548     /* The way is open! Fastly set all the necessary fields... */
1549 
1550     sock_hold(sk);
1551     unix_peer(newsk)    = sk;
1552     newsk->sk_state     = TCP_ESTABLISHED;
1553     newsk->sk_type      = sk->sk_type;
1554     init_peercred(newsk);
1555     newu = unix_sk(newsk);
1556     RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1557     otheru = unix_sk(other);
1558 
1559     /* copy address information from listening to new sock
1560      *
1561      * The contents of *(otheru->addr) and otheru->path
1562      * are seen fully set up here, since we have found
1563      * otheru in hash under its lock.  Insertion into the
1564      * hash chain we'd found it in had been done in an
1565      * earlier critical area protected by the chain's lock,
1566      * the same one where we'd set *(otheru->addr) contents,
1567      * as well as otheru->path and otheru->addr itself.
1568      *
1569      * Using smp_store_release() here to set newu->addr
1570      * is enough to make those stores, as well as stores
1571      * to newu->path visible to anyone who gets newu->addr
1572      * by smp_load_acquire().  IOW, the same warranties
1573      * as for unix_sock instances bound in unix_bind() or
1574      * in unix_autobind().
1575      */
1576     if (otheru->path.dentry) {
1577         path_get(&otheru->path);
1578         newu->path = otheru->path;
1579     }
1580     refcount_inc(&otheru->addr->refcnt);
1581     smp_store_release(&newu->addr, otheru->addr);
1582 
1583     /* Set credentials */
1584     copy_peercred(sk, other);
1585 
1586     sock->state = SS_CONNECTED;
1587     sk->sk_state    = TCP_ESTABLISHED;
1588     sock_hold(newsk);
1589 
1590     smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1591     unix_peer(sk)   = newsk;
1592 
1593     unix_state_unlock(sk);
1594 
1595     /* take ten and send info to listening sock */
1596     spin_lock(&other->sk_receive_queue.lock);
1597     __skb_queue_tail(&other->sk_receive_queue, skb);
1598     spin_unlock(&other->sk_receive_queue.lock);
1599     unix_state_unlock(other);
1600     other->sk_data_ready(other);
1601     sock_put(other);
1602     return 0;
1603 
1604 out_unlock:
1605     if (other)
1606         unix_state_unlock(other);
1607 
1608 out:
1609     kfree_skb(skb);
1610     if (newsk)
1611         unix_release_sock(newsk, 0);
1612     if (other)
1613         sock_put(other);
1614     return err;
1615 }
1616 
1617 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1618 {
1619     struct sock *ska = socka->sk, *skb = sockb->sk;
1620 
1621     /* Join our sockets back to back */
1622     sock_hold(ska);
1623     sock_hold(skb);
1624     unix_peer(ska) = skb;
1625     unix_peer(skb) = ska;
1626     init_peercred(ska);
1627     init_peercred(skb);
1628 
1629     ska->sk_state = TCP_ESTABLISHED;
1630     skb->sk_state = TCP_ESTABLISHED;
1631     socka->state  = SS_CONNECTED;
1632     sockb->state  = SS_CONNECTED;
1633     return 0;
1634 }
1635 
1636 static void unix_sock_inherit_flags(const struct socket *old,
1637                     struct socket *new)
1638 {
1639     if (test_bit(SOCK_PASSCRED, &old->flags))
1640         set_bit(SOCK_PASSCRED, &new->flags);
1641     if (test_bit(SOCK_PASSSEC, &old->flags))
1642         set_bit(SOCK_PASSSEC, &new->flags);
1643 }
1644 
1645 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1646                bool kern)
1647 {
1648     struct sock *sk = sock->sk;
1649     struct sock *tsk;
1650     struct sk_buff *skb;
1651     int err;
1652 
1653     err = -EOPNOTSUPP;
1654     if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1655         goto out;
1656 
1657     err = -EINVAL;
1658     if (sk->sk_state != TCP_LISTEN)
1659         goto out;
1660 
1661     /* If socket state is TCP_LISTEN it cannot change (for now...),
1662      * so that no locks are necessary.
1663      */
1664 
1665     skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1666                 &err);
1667     if (!skb) {
1668         /* This means receive shutdown. */
1669         if (err == 0)
1670             err = -EINVAL;
1671         goto out;
1672     }
1673 
1674     tsk = skb->sk;
1675     skb_free_datagram(sk, skb);
1676     wake_up_interruptible(&unix_sk(sk)->peer_wait);
1677 
1678     /* attach accepted sock to socket */
1679     unix_state_lock(tsk);
1680     newsock->state = SS_CONNECTED;
1681     unix_sock_inherit_flags(sock, newsock);
1682     sock_graft(tsk, newsock);
1683     unix_state_unlock(tsk);
1684     return 0;
1685 
1686 out:
1687     return err;
1688 }
1689 
1690 
1691 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1692 {
1693     struct sock *sk = sock->sk;
1694     struct unix_address *addr;
1695     DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1696     int err = 0;
1697 
1698     if (peer) {
1699         sk = unix_peer_get(sk);
1700 
1701         err = -ENOTCONN;
1702         if (!sk)
1703             goto out;
1704         err = 0;
1705     } else {
1706         sock_hold(sk);
1707     }
1708 
1709     addr = smp_load_acquire(&unix_sk(sk)->addr);
1710     if (!addr) {
1711         sunaddr->sun_family = AF_UNIX;
1712         sunaddr->sun_path[0] = 0;
1713         err = offsetof(struct sockaddr_un, sun_path);
1714     } else {
1715         err = addr->len;
1716         memcpy(sunaddr, addr->name, addr->len);
1717     }
1718     sock_put(sk);
1719 out:
1720     return err;
1721 }
1722 
1723 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1724 {
1725     scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1726 
1727     /*
1728      * Garbage collection of unix sockets starts by selecting a set of
1729      * candidate sockets which have reference only from being in flight
1730      * (total_refs == inflight_refs).  This condition is checked once during
1731      * the candidate collection phase, and candidates are marked as such, so
1732      * that non-candidates can later be ignored.  While inflight_refs is
1733      * protected by unix_gc_lock, total_refs (file count) is not, hence this
1734      * is an instantaneous decision.
1735      *
1736      * Once a candidate, however, the socket must not be reinstalled into a
1737      * file descriptor while the garbage collection is in progress.
1738      *
1739      * If the above conditions are met, then the directed graph of
1740      * candidates (*) does not change while unix_gc_lock is held.
1741      *
1742      * Any operations that changes the file count through file descriptors
1743      * (dup, close, sendmsg) does not change the graph since candidates are
1744      * not installed in fds.
1745      *
1746      * Dequeing a candidate via recvmsg would install it into an fd, but
1747      * that takes unix_gc_lock to decrement the inflight count, so it's
1748      * serialized with garbage collection.
1749      *
1750      * MSG_PEEK is special in that it does not change the inflight count,
1751      * yet does install the socket into an fd.  The following lock/unlock
1752      * pair is to ensure serialization with garbage collection.  It must be
1753      * done between incrementing the file count and installing the file into
1754      * an fd.
1755      *
1756      * If garbage collection starts after the barrier provided by the
1757      * lock/unlock, then it will see the elevated refcount and not mark this
1758      * as a candidate.  If a garbage collection is already in progress
1759      * before the file count was incremented, then the lock/unlock pair will
1760      * ensure that garbage collection is finished before progressing to
1761      * installing the fd.
1762      *
1763      * (*) A -> B where B is on the queue of A or B is on the queue of C
1764      * which is on the queue of listening socket A.
1765      */
1766     spin_lock(&unix_gc_lock);
1767     spin_unlock(&unix_gc_lock);
1768 }
1769 
1770 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1771 {
1772     int err = 0;
1773 
1774     UNIXCB(skb).pid  = get_pid(scm->pid);
1775     UNIXCB(skb).uid = scm->creds.uid;
1776     UNIXCB(skb).gid = scm->creds.gid;
1777     UNIXCB(skb).fp = NULL;
1778     unix_get_secdata(scm, skb);
1779     if (scm->fp && send_fds)
1780         err = unix_attach_fds(scm, skb);
1781 
1782     skb->destructor = unix_destruct_scm;
1783     return err;
1784 }
1785 
1786 static bool unix_passcred_enabled(const struct socket *sock,
1787                   const struct sock *other)
1788 {
1789     return test_bit(SOCK_PASSCRED, &sock->flags) ||
1790            !other->sk_socket ||
1791            test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1792 }
1793 
1794 /*
1795  * Some apps rely on write() giving SCM_CREDENTIALS
1796  * We include credentials if source or destination socket
1797  * asserted SOCK_PASSCRED.
1798  */
1799 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1800                 const struct sock *other)
1801 {
1802     if (UNIXCB(skb).pid)
1803         return;
1804     if (unix_passcred_enabled(sock, other)) {
1805         UNIXCB(skb).pid  = get_pid(task_tgid(current));
1806         current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1807     }
1808 }
1809 
1810 static int maybe_init_creds(struct scm_cookie *scm,
1811                 struct socket *socket,
1812                 const struct sock *other)
1813 {
1814     int err;
1815     struct msghdr msg = { .msg_controllen = 0 };
1816 
1817     err = scm_send(socket, &msg, scm, false);
1818     if (err)
1819         return err;
1820 
1821     if (unix_passcred_enabled(socket, other)) {
1822         scm->pid = get_pid(task_tgid(current));
1823         current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1824     }
1825     return err;
1826 }
1827 
1828 static bool unix_skb_scm_eq(struct sk_buff *skb,
1829                 struct scm_cookie *scm)
1830 {
1831     return UNIXCB(skb).pid == scm->pid &&
1832            uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1833            gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1834            unix_secdata_eq(scm, skb);
1835 }
1836 
1837 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1838 {
1839     struct scm_fp_list *fp = UNIXCB(skb).fp;
1840     struct unix_sock *u = unix_sk(sk);
1841 
1842     if (unlikely(fp && fp->count))
1843         atomic_add(fp->count, &u->scm_stat.nr_fds);
1844 }
1845 
1846 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1847 {
1848     struct scm_fp_list *fp = UNIXCB(skb).fp;
1849     struct unix_sock *u = unix_sk(sk);
1850 
1851     if (unlikely(fp && fp->count))
1852         atomic_sub(fp->count, &u->scm_stat.nr_fds);
1853 }
1854 
1855 /*
1856  *  Send AF_UNIX data.
1857  */
1858 
1859 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1860                   size_t len)
1861 {
1862     DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1863     struct sock *sk = sock->sk, *other = NULL;
1864     struct unix_sock *u = unix_sk(sk);
1865     struct scm_cookie scm;
1866     struct sk_buff *skb;
1867     int data_len = 0;
1868     int sk_locked;
1869     long timeo;
1870     int err;
1871 
1872     wait_for_unix_gc();
1873     err = scm_send(sock, msg, &scm, false);
1874     if (err < 0)
1875         return err;
1876 
1877     err = -EOPNOTSUPP;
1878     if (msg->msg_flags&MSG_OOB)
1879         goto out;
1880 
1881     if (msg->msg_namelen) {
1882         err = unix_validate_addr(sunaddr, msg->msg_namelen);
1883         if (err)
1884             goto out;
1885     } else {
1886         sunaddr = NULL;
1887         err = -ENOTCONN;
1888         other = unix_peer_get(sk);
1889         if (!other)
1890             goto out;
1891     }
1892 
1893     if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1894         err = unix_autobind(sk);
1895         if (err)
1896             goto out;
1897     }
1898 
1899     err = -EMSGSIZE;
1900     if (len > sk->sk_sndbuf - 32)
1901         goto out;
1902 
1903     if (len > SKB_MAX_ALLOC) {
1904         data_len = min_t(size_t,
1905                  len - SKB_MAX_ALLOC,
1906                  MAX_SKB_FRAGS * PAGE_SIZE);
1907         data_len = PAGE_ALIGN(data_len);
1908 
1909         BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1910     }
1911 
1912     skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1913                    msg->msg_flags & MSG_DONTWAIT, &err,
1914                    PAGE_ALLOC_COSTLY_ORDER);
1915     if (skb == NULL)
1916         goto out;
1917 
1918     err = unix_scm_to_skb(&scm, skb, true);
1919     if (err < 0)
1920         goto out_free;
1921 
1922     skb_put(skb, len - data_len);
1923     skb->data_len = data_len;
1924     skb->len = len;
1925     err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1926     if (err)
1927         goto out_free;
1928 
1929     timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1930 
1931 restart:
1932     if (!other) {
1933         err = -ECONNRESET;
1934         if (sunaddr == NULL)
1935             goto out_free;
1936 
1937         other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1938                     sk->sk_type);
1939         if (IS_ERR(other)) {
1940             err = PTR_ERR(other);
1941             other = NULL;
1942             goto out_free;
1943         }
1944     }
1945 
1946     if (sk_filter(other, skb) < 0) {
1947         /* Toss the packet but do not return any error to the sender */
1948         err = len;
1949         goto out_free;
1950     }
1951 
1952     sk_locked = 0;
1953     unix_state_lock(other);
1954 restart_locked:
1955     err = -EPERM;
1956     if (!unix_may_send(sk, other))
1957         goto out_unlock;
1958 
1959     if (unlikely(sock_flag(other, SOCK_DEAD))) {
1960         /*
1961          *  Check with 1003.1g - what should
1962          *  datagram error
1963          */
1964         unix_state_unlock(other);
1965         sock_put(other);
1966 
1967         if (!sk_locked)
1968             unix_state_lock(sk);
1969 
1970         err = 0;
1971         if (unix_peer(sk) == other) {
1972             unix_peer(sk) = NULL;
1973             unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1974 
1975             unix_state_unlock(sk);
1976 
1977             sk->sk_state = TCP_CLOSE;
1978             unix_dgram_disconnected(sk, other);
1979             sock_put(other);
1980             err = -ECONNREFUSED;
1981         } else {
1982             unix_state_unlock(sk);
1983         }
1984 
1985         other = NULL;
1986         if (err)
1987             goto out_free;
1988         goto restart;
1989     }
1990 
1991     err = -EPIPE;
1992     if (other->sk_shutdown & RCV_SHUTDOWN)
1993         goto out_unlock;
1994 
1995     if (sk->sk_type != SOCK_SEQPACKET) {
1996         err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1997         if (err)
1998             goto out_unlock;
1999     }
2000 
2001     /* other == sk && unix_peer(other) != sk if
2002      * - unix_peer(sk) == NULL, destination address bound to sk
2003      * - unix_peer(sk) == sk by time of get but disconnected before lock
2004      */
2005     if (other != sk &&
2006         unlikely(unix_peer(other) != sk &&
2007         unix_recvq_full_lockless(other))) {
2008         if (timeo) {
2009             timeo = unix_wait_for_peer(other, timeo);
2010 
2011             err = sock_intr_errno(timeo);
2012             if (signal_pending(current))
2013                 goto out_free;
2014 
2015             goto restart;
2016         }
2017 
2018         if (!sk_locked) {
2019             unix_state_unlock(other);
2020             unix_state_double_lock(sk, other);
2021         }
2022 
2023         if (unix_peer(sk) != other ||
2024             unix_dgram_peer_wake_me(sk, other)) {
2025             err = -EAGAIN;
2026             sk_locked = 1;
2027             goto out_unlock;
2028         }
2029 
2030         if (!sk_locked) {
2031             sk_locked = 1;
2032             goto restart_locked;
2033         }
2034     }
2035 
2036     if (unlikely(sk_locked))
2037         unix_state_unlock(sk);
2038 
2039     if (sock_flag(other, SOCK_RCVTSTAMP))
2040         __net_timestamp(skb);
2041     maybe_add_creds(skb, sock, other);
2042     scm_stat_add(other, skb);
2043     skb_queue_tail(&other->sk_receive_queue, skb);
2044     unix_state_unlock(other);
2045     other->sk_data_ready(other);
2046     sock_put(other);
2047     scm_destroy(&scm);
2048     return len;
2049 
2050 out_unlock:
2051     if (sk_locked)
2052         unix_state_unlock(sk);
2053     unix_state_unlock(other);
2054 out_free:
2055     kfree_skb(skb);
2056 out:
2057     if (other)
2058         sock_put(other);
2059     scm_destroy(&scm);
2060     return err;
2061 }
2062 
2063 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2064  * bytes, and a minimum of a full page.
2065  */
2066 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2067 
2068 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2069 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
2070 {
2071     struct unix_sock *ousk = unix_sk(other);
2072     struct sk_buff *skb;
2073     int err = 0;
2074 
2075     skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2076 
2077     if (!skb)
2078         return err;
2079 
2080     skb_put(skb, 1);
2081     err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2082 
2083     if (err) {
2084         kfree_skb(skb);
2085         return err;
2086     }
2087 
2088     unix_state_lock(other);
2089 
2090     if (sock_flag(other, SOCK_DEAD) ||
2091         (other->sk_shutdown & RCV_SHUTDOWN)) {
2092         unix_state_unlock(other);
2093         kfree_skb(skb);
2094         return -EPIPE;
2095     }
2096 
2097     maybe_add_creds(skb, sock, other);
2098     skb_get(skb);
2099 
2100     if (ousk->oob_skb)
2101         consume_skb(ousk->oob_skb);
2102 
2103     WRITE_ONCE(ousk->oob_skb, skb);
2104 
2105     scm_stat_add(other, skb);
2106     skb_queue_tail(&other->sk_receive_queue, skb);
2107     sk_send_sigurg(other);
2108     unix_state_unlock(other);
2109     other->sk_data_ready(other);
2110 
2111     return err;
2112 }
2113 #endif
2114 
2115 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2116                    size_t len)
2117 {
2118     struct sock *sk = sock->sk;
2119     struct sock *other = NULL;
2120     int err, size;
2121     struct sk_buff *skb;
2122     int sent = 0;
2123     struct scm_cookie scm;
2124     bool fds_sent = false;
2125     int data_len;
2126 
2127     wait_for_unix_gc();
2128     err = scm_send(sock, msg, &scm, false);
2129     if (err < 0)
2130         return err;
2131 
2132     err = -EOPNOTSUPP;
2133     if (msg->msg_flags & MSG_OOB) {
2134 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2135         if (len)
2136             len--;
2137         else
2138 #endif
2139             goto out_err;
2140     }
2141 
2142     if (msg->msg_namelen) {
2143         err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2144         goto out_err;
2145     } else {
2146         err = -ENOTCONN;
2147         other = unix_peer(sk);
2148         if (!other)
2149             goto out_err;
2150     }
2151 
2152     if (sk->sk_shutdown & SEND_SHUTDOWN)
2153         goto pipe_err;
2154 
2155     while (sent < len) {
2156         size = len - sent;
2157 
2158         /* Keep two messages in the pipe so it schedules better */
2159         size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2160 
2161         /* allow fallback to order-0 allocations */
2162         size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2163 
2164         data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2165 
2166         data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2167 
2168         skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2169                        msg->msg_flags & MSG_DONTWAIT, &err,
2170                        get_order(UNIX_SKB_FRAGS_SZ));
2171         if (!skb)
2172             goto out_err;
2173 
2174         /* Only send the fds in the first buffer */
2175         err = unix_scm_to_skb(&scm, skb, !fds_sent);
2176         if (err < 0) {
2177             kfree_skb(skb);
2178             goto out_err;
2179         }
2180         fds_sent = true;
2181 
2182         skb_put(skb, size - data_len);
2183         skb->data_len = data_len;
2184         skb->len = size;
2185         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2186         if (err) {
2187             kfree_skb(skb);
2188             goto out_err;
2189         }
2190 
2191         unix_state_lock(other);
2192 
2193         if (sock_flag(other, SOCK_DEAD) ||
2194             (other->sk_shutdown & RCV_SHUTDOWN))
2195             goto pipe_err_free;
2196 
2197         maybe_add_creds(skb, sock, other);
2198         scm_stat_add(other, skb);
2199         skb_queue_tail(&other->sk_receive_queue, skb);
2200         unix_state_unlock(other);
2201         other->sk_data_ready(other);
2202         sent += size;
2203     }
2204 
2205 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2206     if (msg->msg_flags & MSG_OOB) {
2207         err = queue_oob(sock, msg, other);
2208         if (err)
2209             goto out_err;
2210         sent++;
2211     }
2212 #endif
2213 
2214     scm_destroy(&scm);
2215 
2216     return sent;
2217 
2218 pipe_err_free:
2219     unix_state_unlock(other);
2220     kfree_skb(skb);
2221 pipe_err:
2222     if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2223         send_sig(SIGPIPE, current, 0);
2224     err = -EPIPE;
2225 out_err:
2226     scm_destroy(&scm);
2227     return sent ? : err;
2228 }
2229 
2230 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2231                     int offset, size_t size, int flags)
2232 {
2233     int err;
2234     bool send_sigpipe = false;
2235     bool init_scm = true;
2236     struct scm_cookie scm;
2237     struct sock *other, *sk = socket->sk;
2238     struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2239 
2240     if (flags & MSG_OOB)
2241         return -EOPNOTSUPP;
2242 
2243     other = unix_peer(sk);
2244     if (!other || sk->sk_state != TCP_ESTABLISHED)
2245         return -ENOTCONN;
2246 
2247     if (false) {
2248 alloc_skb:
2249         unix_state_unlock(other);
2250         mutex_unlock(&unix_sk(other)->iolock);
2251         newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2252                           &err, 0);
2253         if (!newskb)
2254             goto err;
2255     }
2256 
2257     /* we must acquire iolock as we modify already present
2258      * skbs in the sk_receive_queue and mess with skb->len
2259      */
2260     err = mutex_lock_interruptible(&unix_sk(other)->iolock);
2261     if (err) {
2262         err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
2263         goto err;
2264     }
2265 
2266     if (sk->sk_shutdown & SEND_SHUTDOWN) {
2267         err = -EPIPE;
2268         send_sigpipe = true;
2269         goto err_unlock;
2270     }
2271 
2272     unix_state_lock(other);
2273 
2274     if (sock_flag(other, SOCK_DEAD) ||
2275         other->sk_shutdown & RCV_SHUTDOWN) {
2276         err = -EPIPE;
2277         send_sigpipe = true;
2278         goto err_state_unlock;
2279     }
2280 
2281     if (init_scm) {
2282         err = maybe_init_creds(&scm, socket, other);
2283         if (err)
2284             goto err_state_unlock;
2285         init_scm = false;
2286     }
2287 
2288     skb = skb_peek_tail(&other->sk_receive_queue);
2289     if (tail && tail == skb) {
2290         skb = newskb;
2291     } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2292         if (newskb) {
2293             skb = newskb;
2294         } else {
2295             tail = skb;
2296             goto alloc_skb;
2297         }
2298     } else if (newskb) {
2299         /* this is fast path, we don't necessarily need to
2300          * call to kfree_skb even though with newskb == NULL
2301          * this - does no harm
2302          */
2303         consume_skb(newskb);
2304         newskb = NULL;
2305     }
2306 
2307     if (skb_append_pagefrags(skb, page, offset, size)) {
2308         tail = skb;
2309         goto alloc_skb;
2310     }
2311 
2312     skb->len += size;
2313     skb->data_len += size;
2314     skb->truesize += size;
2315     refcount_add(size, &sk->sk_wmem_alloc);
2316 
2317     if (newskb) {
2318         err = unix_scm_to_skb(&scm, skb, false);
2319         if (err)
2320             goto err_state_unlock;
2321         spin_lock(&other->sk_receive_queue.lock);
2322         __skb_queue_tail(&other->sk_receive_queue, newskb);
2323         spin_unlock(&other->sk_receive_queue.lock);
2324     }
2325 
2326     unix_state_unlock(other);
2327     mutex_unlock(&unix_sk(other)->iolock);
2328 
2329     other->sk_data_ready(other);
2330     scm_destroy(&scm);
2331     return size;
2332 
2333 err_state_unlock:
2334     unix_state_unlock(other);
2335 err_unlock:
2336     mutex_unlock(&unix_sk(other)->iolock);
2337 err:
2338     kfree_skb(newskb);
2339     if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2340         send_sig(SIGPIPE, current, 0);
2341     if (!init_scm)
2342         scm_destroy(&scm);
2343     return err;
2344 }
2345 
2346 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2347                   size_t len)
2348 {
2349     int err;
2350     struct sock *sk = sock->sk;
2351 
2352     err = sock_error(sk);
2353     if (err)
2354         return err;
2355 
2356     if (sk->sk_state != TCP_ESTABLISHED)
2357         return -ENOTCONN;
2358 
2359     if (msg->msg_namelen)
2360         msg->msg_namelen = 0;
2361 
2362     return unix_dgram_sendmsg(sock, msg, len);
2363 }
2364 
2365 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2366                   size_t size, int flags)
2367 {
2368     struct sock *sk = sock->sk;
2369 
2370     if (sk->sk_state != TCP_ESTABLISHED)
2371         return -ENOTCONN;
2372 
2373     return unix_dgram_recvmsg(sock, msg, size, flags);
2374 }
2375 
2376 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2377 {
2378     struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2379 
2380     if (addr) {
2381         msg->msg_namelen = addr->len;
2382         memcpy(msg->msg_name, addr->name, addr->len);
2383     }
2384 }
2385 
2386 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2387              int flags)
2388 {
2389     struct scm_cookie scm;
2390     struct socket *sock = sk->sk_socket;
2391     struct unix_sock *u = unix_sk(sk);
2392     struct sk_buff *skb, *last;
2393     long timeo;
2394     int skip;
2395     int err;
2396 
2397     err = -EOPNOTSUPP;
2398     if (flags&MSG_OOB)
2399         goto out;
2400 
2401     timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2402 
2403     do {
2404         mutex_lock(&u->iolock);
2405 
2406         skip = sk_peek_offset(sk, flags);
2407         skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2408                           &skip, &err, &last);
2409         if (skb) {
2410             if (!(flags & MSG_PEEK))
2411                 scm_stat_del(sk, skb);
2412             break;
2413         }
2414 
2415         mutex_unlock(&u->iolock);
2416 
2417         if (err != -EAGAIN)
2418             break;
2419     } while (timeo &&
2420          !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2421                           &err, &timeo, last));
2422 
2423     if (!skb) { /* implies iolock unlocked */
2424         unix_state_lock(sk);
2425         /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2426         if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2427             (sk->sk_shutdown & RCV_SHUTDOWN))
2428             err = 0;
2429         unix_state_unlock(sk);
2430         goto out;
2431     }
2432 
2433     if (wq_has_sleeper(&u->peer_wait))
2434         wake_up_interruptible_sync_poll(&u->peer_wait,
2435                         EPOLLOUT | EPOLLWRNORM |
2436                         EPOLLWRBAND);
2437 
2438     if (msg->msg_name)
2439         unix_copy_addr(msg, skb->sk);
2440 
2441     if (size > skb->len - skip)
2442         size = skb->len - skip;
2443     else if (size < skb->len - skip)
2444         msg->msg_flags |= MSG_TRUNC;
2445 
2446     err = skb_copy_datagram_msg(skb, skip, msg, size);
2447     if (err)
2448         goto out_free;
2449 
2450     if (sock_flag(sk, SOCK_RCVTSTAMP))
2451         __sock_recv_timestamp(msg, sk, skb);
2452 
2453     memset(&scm, 0, sizeof(scm));
2454 
2455     scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2456     unix_set_secdata(&scm, skb);
2457 
2458     if (!(flags & MSG_PEEK)) {
2459         if (UNIXCB(skb).fp)
2460             unix_detach_fds(&scm, skb);
2461 
2462         sk_peek_offset_bwd(sk, skb->len);
2463     } else {
2464         /* It is questionable: on PEEK we could:
2465            - do not return fds - good, but too simple 8)
2466            - return fds, and do not return them on read (old strategy,
2467              apparently wrong)
2468            - clone fds (I chose it for now, it is the most universal
2469              solution)
2470 
2471            POSIX 1003.1g does not actually define this clearly
2472            at all. POSIX 1003.1g doesn't define a lot of things
2473            clearly however!
2474 
2475         */
2476 
2477         sk_peek_offset_fwd(sk, size);
2478 
2479         if (UNIXCB(skb).fp)
2480             unix_peek_fds(&scm, skb);
2481     }
2482     err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2483 
2484     scm_recv(sock, msg, &scm, flags);
2485 
2486 out_free:
2487     skb_free_datagram(sk, skb);
2488     mutex_unlock(&u->iolock);
2489 out:
2490     return err;
2491 }
2492 
2493 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2494                   int flags)
2495 {
2496     struct sock *sk = sock->sk;
2497 
2498 #ifdef CONFIG_BPF_SYSCALL
2499     const struct proto *prot = READ_ONCE(sk->sk_prot);
2500 
2501     if (prot != &unix_dgram_proto)
2502         return prot->recvmsg(sk, msg, size, flags, NULL);
2503 #endif
2504     return __unix_dgram_recvmsg(sk, msg, size, flags);
2505 }
2506 
2507 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2508 {
2509     int copied = 0;
2510 
2511     while (1) {
2512         struct unix_sock *u = unix_sk(sk);
2513         struct sk_buff *skb;
2514         int used, err;
2515 
2516         mutex_lock(&u->iolock);
2517         skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2518         mutex_unlock(&u->iolock);
2519         if (!skb)
2520             return err;
2521 
2522         used = recv_actor(sk, skb);
2523         if (used <= 0) {
2524             if (!copied)
2525                 copied = used;
2526             kfree_skb(skb);
2527             break;
2528         } else if (used <= skb->len) {
2529             copied += used;
2530         }
2531 
2532         kfree_skb(skb);
2533         break;
2534     }
2535 
2536     return copied;
2537 }
2538 
2539 /*
2540  *  Sleep until more data has arrived. But check for races..
2541  */
2542 static long unix_stream_data_wait(struct sock *sk, long timeo,
2543                   struct sk_buff *last, unsigned int last_len,
2544                   bool freezable)
2545 {
2546     struct sk_buff *tail;
2547     DEFINE_WAIT(wait);
2548 
2549     unix_state_lock(sk);
2550 
2551     for (;;) {
2552         prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2553 
2554         tail = skb_peek_tail(&sk->sk_receive_queue);
2555         if (tail != last ||
2556             (tail && tail->len != last_len) ||
2557             sk->sk_err ||
2558             (sk->sk_shutdown & RCV_SHUTDOWN) ||
2559             signal_pending(current) ||
2560             !timeo)
2561             break;
2562 
2563         sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2564         unix_state_unlock(sk);
2565         if (freezable)
2566             timeo = freezable_schedule_timeout(timeo);
2567         else
2568             timeo = schedule_timeout(timeo);
2569         unix_state_lock(sk);
2570 
2571         if (sock_flag(sk, SOCK_DEAD))
2572             break;
2573 
2574         sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2575     }
2576 
2577     finish_wait(sk_sleep(sk), &wait);
2578     unix_state_unlock(sk);
2579     return timeo;
2580 }
2581 
2582 static unsigned int unix_skb_len(const struct sk_buff *skb)
2583 {
2584     return skb->len - UNIXCB(skb).consumed;
2585 }
2586 
2587 struct unix_stream_read_state {
2588     int (*recv_actor)(struct sk_buff *, int, int,
2589               struct unix_stream_read_state *);
2590     struct socket *socket;
2591     struct msghdr *msg;
2592     struct pipe_inode_info *pipe;
2593     size_t size;
2594     int flags;
2595     unsigned int splice_flags;
2596 };
2597 
2598 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2599 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2600 {
2601     struct socket *sock = state->socket;
2602     struct sock *sk = sock->sk;
2603     struct unix_sock *u = unix_sk(sk);
2604     int chunk = 1;
2605     struct sk_buff *oob_skb;
2606 
2607     mutex_lock(&u->iolock);
2608     unix_state_lock(sk);
2609 
2610     if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2611         unix_state_unlock(sk);
2612         mutex_unlock(&u->iolock);
2613         return -EINVAL;
2614     }
2615 
2616     oob_skb = u->oob_skb;
2617 
2618     if (!(state->flags & MSG_PEEK))
2619         WRITE_ONCE(u->oob_skb, NULL);
2620 
2621     unix_state_unlock(sk);
2622 
2623     chunk = state->recv_actor(oob_skb, 0, chunk, state);
2624 
2625     if (!(state->flags & MSG_PEEK)) {
2626         UNIXCB(oob_skb).consumed += 1;
2627         kfree_skb(oob_skb);
2628     }
2629 
2630     mutex_unlock(&u->iolock);
2631 
2632     if (chunk < 0)
2633         return -EFAULT;
2634 
2635     state->msg->msg_flags |= MSG_OOB;
2636     return 1;
2637 }
2638 
2639 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2640                   int flags, int copied)
2641 {
2642     struct unix_sock *u = unix_sk(sk);
2643 
2644     if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2645         skb_unlink(skb, &sk->sk_receive_queue);
2646         consume_skb(skb);
2647         skb = NULL;
2648     } else {
2649         if (skb == u->oob_skb) {
2650             if (copied) {
2651                 skb = NULL;
2652             } else if (sock_flag(sk, SOCK_URGINLINE)) {
2653                 if (!(flags & MSG_PEEK)) {
2654                     WRITE_ONCE(u->oob_skb, NULL);
2655                     consume_skb(skb);
2656                 }
2657             } else if (!(flags & MSG_PEEK)) {
2658                 skb_unlink(skb, &sk->sk_receive_queue);
2659                 consume_skb(skb);
2660                 skb = skb_peek(&sk->sk_receive_queue);
2661             }
2662         }
2663     }
2664     return skb;
2665 }
2666 #endif
2667 
2668 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2669 {
2670     if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2671         return -ENOTCONN;
2672 
2673     return unix_read_skb(sk, recv_actor);
2674 }
2675 
2676 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2677                     bool freezable)
2678 {
2679     struct scm_cookie scm;
2680     struct socket *sock = state->socket;
2681     struct sock *sk = sock->sk;
2682     struct unix_sock *u = unix_sk(sk);
2683     int copied = 0;
2684     int flags = state->flags;
2685     int noblock = flags & MSG_DONTWAIT;
2686     bool check_creds = false;
2687     int target;
2688     int err = 0;
2689     long timeo;
2690     int skip;
2691     size_t size = state->size;
2692     unsigned int last_len;
2693 
2694     if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2695         err = -EINVAL;
2696         goto out;
2697     }
2698 
2699     if (unlikely(flags & MSG_OOB)) {
2700         err = -EOPNOTSUPP;
2701 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2702         err = unix_stream_recv_urg(state);
2703 #endif
2704         goto out;
2705     }
2706 
2707     target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2708     timeo = sock_rcvtimeo(sk, noblock);
2709 
2710     memset(&scm, 0, sizeof(scm));
2711 
2712     /* Lock the socket to prevent queue disordering
2713      * while sleeps in memcpy_tomsg
2714      */
2715     mutex_lock(&u->iolock);
2716 
2717     skip = max(sk_peek_offset(sk, flags), 0);
2718 
2719     do {
2720         int chunk;
2721         bool drop_skb;
2722         struct sk_buff *skb, *last;
2723 
2724 redo:
2725         unix_state_lock(sk);
2726         if (sock_flag(sk, SOCK_DEAD)) {
2727             err = -ECONNRESET;
2728             goto unlock;
2729         }
2730         last = skb = skb_peek(&sk->sk_receive_queue);
2731         last_len = last ? last->len : 0;
2732 
2733 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2734         if (skb) {
2735             skb = manage_oob(skb, sk, flags, copied);
2736             if (!skb) {
2737                 unix_state_unlock(sk);
2738                 if (copied)
2739                     break;
2740                 goto redo;
2741             }
2742         }
2743 #endif
2744 again:
2745         if (skb == NULL) {
2746             if (copied >= target)
2747                 goto unlock;
2748 
2749             /*
2750              *  POSIX 1003.1g mandates this order.
2751              */
2752 
2753             err = sock_error(sk);
2754             if (err)
2755                 goto unlock;
2756             if (sk->sk_shutdown & RCV_SHUTDOWN)
2757                 goto unlock;
2758 
2759             unix_state_unlock(sk);
2760             if (!timeo) {
2761                 err = -EAGAIN;
2762                 break;
2763             }
2764 
2765             mutex_unlock(&u->iolock);
2766 
2767             timeo = unix_stream_data_wait(sk, timeo, last,
2768                               last_len, freezable);
2769 
2770             if (signal_pending(current)) {
2771                 err = sock_intr_errno(timeo);
2772                 scm_destroy(&scm);
2773                 goto out;
2774             }
2775 
2776             mutex_lock(&u->iolock);
2777             goto redo;
2778 unlock:
2779             unix_state_unlock(sk);
2780             break;
2781         }
2782 
2783         while (skip >= unix_skb_len(skb)) {
2784             skip -= unix_skb_len(skb);
2785             last = skb;
2786             last_len = skb->len;
2787             skb = skb_peek_next(skb, &sk->sk_receive_queue);
2788             if (!skb)
2789                 goto again;
2790         }
2791 
2792         unix_state_unlock(sk);
2793 
2794         if (check_creds) {
2795             /* Never glue messages from different writers */
2796             if (!unix_skb_scm_eq(skb, &scm))
2797                 break;
2798         } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2799             /* Copy credentials */
2800             scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2801             unix_set_secdata(&scm, skb);
2802             check_creds = true;
2803         }
2804 
2805         /* Copy address just once */
2806         if (state->msg && state->msg->msg_name) {
2807             DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2808                      state->msg->msg_name);
2809             unix_copy_addr(state->msg, skb->sk);
2810             sunaddr = NULL;
2811         }
2812 
2813         chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2814         skb_get(skb);
2815         chunk = state->recv_actor(skb, skip, chunk, state);
2816         drop_skb = !unix_skb_len(skb);
2817         /* skb is only safe to use if !drop_skb */
2818         consume_skb(skb);
2819         if (chunk < 0) {
2820             if (copied == 0)
2821                 copied = -EFAULT;
2822             break;
2823         }
2824         copied += chunk;
2825         size -= chunk;
2826 
2827         if (drop_skb) {
2828             /* the skb was touched by a concurrent reader;
2829              * we should not expect anything from this skb
2830              * anymore and assume it invalid - we can be
2831              * sure it was dropped from the socket queue
2832              *
2833              * let's report a short read
2834              */
2835             err = 0;
2836             break;
2837         }
2838 
2839         /* Mark read part of skb as used */
2840         if (!(flags & MSG_PEEK)) {
2841             UNIXCB(skb).consumed += chunk;
2842 
2843             sk_peek_offset_bwd(sk, chunk);
2844 
2845             if (UNIXCB(skb).fp) {
2846                 scm_stat_del(sk, skb);
2847                 unix_detach_fds(&scm, skb);
2848             }
2849 
2850             if (unix_skb_len(skb))
2851                 break;
2852 
2853             skb_unlink(skb, &sk->sk_receive_queue);
2854             consume_skb(skb);
2855 
2856             if (scm.fp)
2857                 break;
2858         } else {
2859             /* It is questionable, see note in unix_dgram_recvmsg.
2860              */
2861             if (UNIXCB(skb).fp)
2862                 unix_peek_fds(&scm, skb);
2863 
2864             sk_peek_offset_fwd(sk, chunk);
2865 
2866             if (UNIXCB(skb).fp)
2867                 break;
2868 
2869             skip = 0;
2870             last = skb;
2871             last_len = skb->len;
2872             unix_state_lock(sk);
2873             skb = skb_peek_next(skb, &sk->sk_receive_queue);
2874             if (skb)
2875                 goto again;
2876             unix_state_unlock(sk);
2877             break;
2878         }
2879     } while (size);
2880 
2881     mutex_unlock(&u->iolock);
2882     if (state->msg)
2883         scm_recv(sock, state->msg, &scm, flags);
2884     else
2885         scm_destroy(&scm);
2886 out:
2887     return copied ? : err;
2888 }
2889 
2890 static int unix_stream_read_actor(struct sk_buff *skb,
2891                   int skip, int chunk,
2892                   struct unix_stream_read_state *state)
2893 {
2894     int ret;
2895 
2896     ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2897                     state->msg, chunk);
2898     return ret ?: chunk;
2899 }
2900 
2901 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2902               size_t size, int flags)
2903 {
2904     struct unix_stream_read_state state = {
2905         .recv_actor = unix_stream_read_actor,
2906         .socket = sk->sk_socket,
2907         .msg = msg,
2908         .size = size,
2909         .flags = flags
2910     };
2911 
2912     return unix_stream_read_generic(&state, true);
2913 }
2914 
2915 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2916                    size_t size, int flags)
2917 {
2918     struct unix_stream_read_state state = {
2919         .recv_actor = unix_stream_read_actor,
2920         .socket = sock,
2921         .msg = msg,
2922         .size = size,
2923         .flags = flags
2924     };
2925 
2926 #ifdef CONFIG_BPF_SYSCALL
2927     struct sock *sk = sock->sk;
2928     const struct proto *prot = READ_ONCE(sk->sk_prot);
2929 
2930     if (prot != &unix_stream_proto)
2931         return prot->recvmsg(sk, msg, size, flags, NULL);
2932 #endif
2933     return unix_stream_read_generic(&state, true);
2934 }
2935 
2936 static int unix_stream_splice_actor(struct sk_buff *skb,
2937                     int skip, int chunk,
2938                     struct unix_stream_read_state *state)
2939 {
2940     return skb_splice_bits(skb, state->socket->sk,
2941                    UNIXCB(skb).consumed + skip,
2942                    state->pipe, chunk, state->splice_flags);
2943 }
2944 
2945 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2946                        struct pipe_inode_info *pipe,
2947                        size_t size, unsigned int flags)
2948 {
2949     struct unix_stream_read_state state = {
2950         .recv_actor = unix_stream_splice_actor,
2951         .socket = sock,
2952         .pipe = pipe,
2953         .size = size,
2954         .splice_flags = flags,
2955     };
2956 
2957     if (unlikely(*ppos))
2958         return -ESPIPE;
2959 
2960     if (sock->file->f_flags & O_NONBLOCK ||
2961         flags & SPLICE_F_NONBLOCK)
2962         state.flags = MSG_DONTWAIT;
2963 
2964     return unix_stream_read_generic(&state, false);
2965 }
2966 
2967 static int unix_shutdown(struct socket *sock, int mode)
2968 {
2969     struct sock *sk = sock->sk;
2970     struct sock *other;
2971 
2972     if (mode < SHUT_RD || mode > SHUT_RDWR)
2973         return -EINVAL;
2974     /* This maps:
2975      * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2976      * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2977      * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2978      */
2979     ++mode;
2980 
2981     unix_state_lock(sk);
2982     sk->sk_shutdown |= mode;
2983     other = unix_peer(sk);
2984     if (other)
2985         sock_hold(other);
2986     unix_state_unlock(sk);
2987     sk->sk_state_change(sk);
2988 
2989     if (other &&
2990         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2991 
2992         int peer_mode = 0;
2993         const struct proto *prot = READ_ONCE(other->sk_prot);
2994 
2995         if (prot->unhash)
2996             prot->unhash(other);
2997         if (mode&RCV_SHUTDOWN)
2998             peer_mode |= SEND_SHUTDOWN;
2999         if (mode&SEND_SHUTDOWN)
3000             peer_mode |= RCV_SHUTDOWN;
3001         unix_state_lock(other);
3002         other->sk_shutdown |= peer_mode;
3003         unix_state_unlock(other);
3004         other->sk_state_change(other);
3005         if (peer_mode == SHUTDOWN_MASK)
3006             sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
3007         else if (peer_mode & RCV_SHUTDOWN)
3008             sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3009     }
3010     if (other)
3011         sock_put(other);
3012 
3013     return 0;
3014 }
3015 
3016 long unix_inq_len(struct sock *sk)
3017 {
3018     struct sk_buff *skb;
3019     long amount = 0;
3020 
3021     if (sk->sk_state == TCP_LISTEN)
3022         return -EINVAL;
3023 
3024     spin_lock(&sk->sk_receive_queue.lock);
3025     if (sk->sk_type == SOCK_STREAM ||
3026         sk->sk_type == SOCK_SEQPACKET) {
3027         skb_queue_walk(&sk->sk_receive_queue, skb)
3028             amount += unix_skb_len(skb);
3029     } else {
3030         skb = skb_peek(&sk->sk_receive_queue);
3031         if (skb)
3032             amount = skb->len;
3033     }
3034     spin_unlock(&sk->sk_receive_queue.lock);
3035 
3036     return amount;
3037 }
3038 EXPORT_SYMBOL_GPL(unix_inq_len);
3039 
3040 long unix_outq_len(struct sock *sk)
3041 {
3042     return sk_wmem_alloc_get(sk);
3043 }
3044 EXPORT_SYMBOL_GPL(unix_outq_len);
3045 
3046 static int unix_open_file(struct sock *sk)
3047 {
3048     struct path path;
3049     struct file *f;
3050     int fd;
3051 
3052     if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3053         return -EPERM;
3054 
3055     if (!smp_load_acquire(&unix_sk(sk)->addr))
3056         return -ENOENT;
3057 
3058     path = unix_sk(sk)->path;
3059     if (!path.dentry)
3060         return -ENOENT;
3061 
3062     path_get(&path);
3063 
3064     fd = get_unused_fd_flags(O_CLOEXEC);
3065     if (fd < 0)
3066         goto out;
3067 
3068     f = dentry_open(&path, O_PATH, current_cred());
3069     if (IS_ERR(f)) {
3070         put_unused_fd(fd);
3071         fd = PTR_ERR(f);
3072         goto out;
3073     }
3074 
3075     fd_install(fd, f);
3076 out:
3077     path_put(&path);
3078 
3079     return fd;
3080 }
3081 
3082 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3083 {
3084     struct sock *sk = sock->sk;
3085     long amount = 0;
3086     int err;
3087 
3088     switch (cmd) {
3089     case SIOCOUTQ:
3090         amount = unix_outq_len(sk);
3091         err = put_user(amount, (int __user *)arg);
3092         break;
3093     case SIOCINQ:
3094         amount = unix_inq_len(sk);
3095         if (amount < 0)
3096             err = amount;
3097         else
3098             err = put_user(amount, (int __user *)arg);
3099         break;
3100     case SIOCUNIXFILE:
3101         err = unix_open_file(sk);
3102         break;
3103 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3104     case SIOCATMARK:
3105         {
3106             struct sk_buff *skb;
3107             int answ = 0;
3108 
3109             skb = skb_peek(&sk->sk_receive_queue);
3110             if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3111                 answ = 1;
3112             err = put_user(answ, (int __user *)arg);
3113         }
3114         break;
3115 #endif
3116     default:
3117         err = -ENOIOCTLCMD;
3118         break;
3119     }
3120     return err;
3121 }
3122 
3123 #ifdef CONFIG_COMPAT
3124 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3125 {
3126     return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3127 }
3128 #endif
3129 
3130 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3131 {
3132     struct sock *sk = sock->sk;
3133     __poll_t mask;
3134 
3135     sock_poll_wait(file, sock, wait);
3136     mask = 0;
3137 
3138     /* exceptional events? */
3139     if (sk->sk_err)
3140         mask |= EPOLLERR;
3141     if (sk->sk_shutdown == SHUTDOWN_MASK)
3142         mask |= EPOLLHUP;
3143     if (sk->sk_shutdown & RCV_SHUTDOWN)
3144         mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3145 
3146     /* readable? */
3147     if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3148         mask |= EPOLLIN | EPOLLRDNORM;
3149     if (sk_is_readable(sk))
3150         mask |= EPOLLIN | EPOLLRDNORM;
3151 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3152     if (READ_ONCE(unix_sk(sk)->oob_skb))
3153         mask |= EPOLLPRI;
3154 #endif
3155 
3156     /* Connection-based need to check for termination and startup */
3157     if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3158         sk->sk_state == TCP_CLOSE)
3159         mask |= EPOLLHUP;
3160 
3161     /*
3162      * we set writable also when the other side has shut down the
3163      * connection. This prevents stuck sockets.
3164      */
3165     if (unix_writable(sk))
3166         mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3167 
3168     return mask;
3169 }
3170 
3171 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3172                     poll_table *wait)
3173 {
3174     struct sock *sk = sock->sk, *other;
3175     unsigned int writable;
3176     __poll_t mask;
3177 
3178     sock_poll_wait(file, sock, wait);
3179     mask = 0;
3180 
3181     /* exceptional events? */
3182     if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
3183         mask |= EPOLLERR |
3184             (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3185 
3186     if (sk->sk_shutdown & RCV_SHUTDOWN)
3187         mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3188     if (sk->sk_shutdown == SHUTDOWN_MASK)
3189         mask |= EPOLLHUP;
3190 
3191     /* readable? */
3192     if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3193         mask |= EPOLLIN | EPOLLRDNORM;
3194     if (sk_is_readable(sk))
3195         mask |= EPOLLIN | EPOLLRDNORM;
3196 
3197     /* Connection-based need to check for termination and startup */
3198     if (sk->sk_type == SOCK_SEQPACKET) {
3199         if (sk->sk_state == TCP_CLOSE)
3200             mask |= EPOLLHUP;
3201         /* connection hasn't started yet? */
3202         if (sk->sk_state == TCP_SYN_SENT)
3203             return mask;
3204     }
3205 
3206     /* No write status requested, avoid expensive OUT tests. */
3207     if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3208         return mask;
3209 
3210     writable = unix_writable(sk);
3211     if (writable) {
3212         unix_state_lock(sk);
3213 
3214         other = unix_peer(sk);
3215         if (other && unix_peer(other) != sk &&
3216             unix_recvq_full_lockless(other) &&
3217             unix_dgram_peer_wake_me(sk, other))
3218             writable = 0;
3219 
3220         unix_state_unlock(sk);
3221     }
3222 
3223     if (writable)
3224         mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3225     else
3226         sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3227 
3228     return mask;
3229 }
3230 
3231 #ifdef CONFIG_PROC_FS
3232 
3233 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3234 
3235 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3236 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3237 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3238 
3239 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3240 {
3241     unsigned long offset = get_offset(*pos);
3242     unsigned long bucket = get_bucket(*pos);
3243     unsigned long count = 0;
3244     struct sock *sk;
3245 
3246     for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3247          sk; sk = sk_next(sk)) {
3248         if (++count == offset)
3249             break;
3250     }
3251 
3252     return sk;
3253 }
3254 
3255 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3256 {
3257     unsigned long bucket = get_bucket(*pos);
3258     struct net *net = seq_file_net(seq);
3259     struct sock *sk;
3260 
3261     while (bucket < UNIX_HASH_SIZE) {
3262         spin_lock(&net->unx.table.locks[bucket]);
3263 
3264         sk = unix_from_bucket(seq, pos);
3265         if (sk)
3266             return sk;
3267 
3268         spin_unlock(&net->unx.table.locks[bucket]);
3269 
3270         *pos = set_bucket_offset(++bucket, 1);
3271     }
3272 
3273     return NULL;
3274 }
3275 
3276 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3277                   loff_t *pos)
3278 {
3279     unsigned long bucket = get_bucket(*pos);
3280 
3281     sk = sk_next(sk);
3282     if (sk)
3283         return sk;
3284 
3285 
3286     spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3287 
3288     *pos = set_bucket_offset(++bucket, 1);
3289 
3290     return unix_get_first(seq, pos);
3291 }
3292 
3293 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3294 {
3295     if (!*pos)
3296         return SEQ_START_TOKEN;
3297 
3298     return unix_get_first(seq, pos);
3299 }
3300 
3301 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3302 {
3303     ++*pos;
3304 
3305     if (v == SEQ_START_TOKEN)
3306         return unix_get_first(seq, pos);
3307 
3308     return unix_get_next(seq, v, pos);
3309 }
3310 
3311 static void unix_seq_stop(struct seq_file *seq, void *v)
3312 {
3313     struct sock *sk = v;
3314 
3315     if (sk)
3316         spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3317 }
3318 
3319 static int unix_seq_show(struct seq_file *seq, void *v)
3320 {
3321 
3322     if (v == SEQ_START_TOKEN)
3323         seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3324              "Inode Path\n");
3325     else {
3326         struct sock *s = v;
3327         struct unix_sock *u = unix_sk(s);
3328         unix_state_lock(s);
3329 
3330         seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3331             s,
3332             refcount_read(&s->sk_refcnt),
3333             0,
3334             s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3335             s->sk_type,
3336             s->sk_socket ?
3337             (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3338             (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3339             sock_i_ino(s));
3340 
3341         if (u->addr) {  // under a hash table lock here
3342             int i, len;
3343             seq_putc(seq, ' ');
3344 
3345             i = 0;
3346             len = u->addr->len -
3347                 offsetof(struct sockaddr_un, sun_path);
3348             if (u->addr->name->sun_path[0]) {
3349                 len--;
3350             } else {
3351                 seq_putc(seq, '@');
3352                 i++;
3353             }
3354             for ( ; i < len; i++)
3355                 seq_putc(seq, u->addr->name->sun_path[i] ?:
3356                      '@');
3357         }
3358         unix_state_unlock(s);
3359         seq_putc(seq, '\n');
3360     }
3361 
3362     return 0;
3363 }
3364 
3365 static const struct seq_operations unix_seq_ops = {
3366     .start  = unix_seq_start,
3367     .next   = unix_seq_next,
3368     .stop   = unix_seq_stop,
3369     .show   = unix_seq_show,
3370 };
3371 
3372 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3373 struct bpf_unix_iter_state {
3374     struct seq_net_private p;
3375     unsigned int cur_sk;
3376     unsigned int end_sk;
3377     unsigned int max_sk;
3378     struct sock **batch;
3379     bool st_bucket_done;
3380 };
3381 
3382 struct bpf_iter__unix {
3383     __bpf_md_ptr(struct bpf_iter_meta *, meta);
3384     __bpf_md_ptr(struct unix_sock *, unix_sk);
3385     uid_t uid __aligned(8);
3386 };
3387 
3388 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3389                   struct unix_sock *unix_sk, uid_t uid)
3390 {
3391     struct bpf_iter__unix ctx;
3392 
3393     meta->seq_num--;  /* skip SEQ_START_TOKEN */
3394     ctx.meta = meta;
3395     ctx.unix_sk = unix_sk;
3396     ctx.uid = uid;
3397     return bpf_iter_run_prog(prog, &ctx);
3398 }
3399 
3400 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3401 
3402 {
3403     struct bpf_unix_iter_state *iter = seq->private;
3404     unsigned int expected = 1;
3405     struct sock *sk;
3406 
3407     sock_hold(start_sk);
3408     iter->batch[iter->end_sk++] = start_sk;
3409 
3410     for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3411         if (iter->end_sk < iter->max_sk) {
3412             sock_hold(sk);
3413             iter->batch[iter->end_sk++] = sk;
3414         }
3415 
3416         expected++;
3417     }
3418 
3419     spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3420 
3421     return expected;
3422 }
3423 
3424 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3425 {
3426     while (iter->cur_sk < iter->end_sk)
3427         sock_put(iter->batch[iter->cur_sk++]);
3428 }
3429 
3430 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3431                        unsigned int new_batch_sz)
3432 {
3433     struct sock **new_batch;
3434 
3435     new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3436                  GFP_USER | __GFP_NOWARN);
3437     if (!new_batch)
3438         return -ENOMEM;
3439 
3440     bpf_iter_unix_put_batch(iter);
3441     kvfree(iter->batch);
3442     iter->batch = new_batch;
3443     iter->max_sk = new_batch_sz;
3444 
3445     return 0;
3446 }
3447 
3448 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3449                     loff_t *pos)
3450 {
3451     struct bpf_unix_iter_state *iter = seq->private;
3452     unsigned int expected;
3453     bool resized = false;
3454     struct sock *sk;
3455 
3456     if (iter->st_bucket_done)
3457         *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3458 
3459 again:
3460     /* Get a new batch */
3461     iter->cur_sk = 0;
3462     iter->end_sk = 0;
3463 
3464     sk = unix_get_first(seq, pos);
3465     if (!sk)
3466         return NULL; /* Done */
3467 
3468     expected = bpf_iter_unix_hold_batch(seq, sk);
3469 
3470     if (iter->end_sk == expected) {
3471         iter->st_bucket_done = true;
3472         return sk;
3473     }
3474 
3475     if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3476         resized = true;
3477         goto again;
3478     }
3479 
3480     return sk;
3481 }
3482 
3483 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3484 {
3485     if (!*pos)
3486         return SEQ_START_TOKEN;
3487 
3488     /* bpf iter does not support lseek, so it always
3489      * continue from where it was stop()-ped.
3490      */
3491     return bpf_iter_unix_batch(seq, pos);
3492 }
3493 
3494 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3495 {
3496     struct bpf_unix_iter_state *iter = seq->private;
3497     struct sock *sk;
3498 
3499     /* Whenever seq_next() is called, the iter->cur_sk is
3500      * done with seq_show(), so advance to the next sk in
3501      * the batch.
3502      */
3503     if (iter->cur_sk < iter->end_sk)
3504         sock_put(iter->batch[iter->cur_sk++]);
3505 
3506     ++*pos;
3507 
3508     if (iter->cur_sk < iter->end_sk)
3509         sk = iter->batch[iter->cur_sk];
3510     else
3511         sk = bpf_iter_unix_batch(seq, pos);
3512 
3513     return sk;
3514 }
3515 
3516 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3517 {
3518     struct bpf_iter_meta meta;
3519     struct bpf_prog *prog;
3520     struct sock *sk = v;
3521     uid_t uid;
3522     bool slow;
3523     int ret;
3524 
3525     if (v == SEQ_START_TOKEN)
3526         return 0;
3527 
3528     slow = lock_sock_fast(sk);
3529 
3530     if (unlikely(sk_unhashed(sk))) {
3531         ret = SEQ_SKIP;
3532         goto unlock;
3533     }
3534 
3535     uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3536     meta.seq = seq;
3537     prog = bpf_iter_get_info(&meta, false);
3538     ret = unix_prog_seq_show(prog, &meta, v, uid);
3539 unlock:
3540     unlock_sock_fast(sk, slow);
3541     return ret;
3542 }
3543 
3544 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3545 {
3546     struct bpf_unix_iter_state *iter = seq->private;
3547     struct bpf_iter_meta meta;
3548     struct bpf_prog *prog;
3549 
3550     if (!v) {
3551         meta.seq = seq;
3552         prog = bpf_iter_get_info(&meta, true);
3553         if (prog)
3554             (void)unix_prog_seq_show(prog, &meta, v, 0);
3555     }
3556 
3557     if (iter->cur_sk < iter->end_sk)
3558         bpf_iter_unix_put_batch(iter);
3559 }
3560 
3561 static const struct seq_operations bpf_iter_unix_seq_ops = {
3562     .start  = bpf_iter_unix_seq_start,
3563     .next   = bpf_iter_unix_seq_next,
3564     .stop   = bpf_iter_unix_seq_stop,
3565     .show   = bpf_iter_unix_seq_show,
3566 };
3567 #endif
3568 #endif
3569 
3570 static const struct net_proto_family unix_family_ops = {
3571     .family = PF_UNIX,
3572     .create = unix_create,
3573     .owner  = THIS_MODULE,
3574 };
3575 
3576 
3577 static int __net_init unix_net_init(struct net *net)
3578 {
3579     int i;
3580 
3581     net->unx.sysctl_max_dgram_qlen = 10;
3582     if (unix_sysctl_register(net))
3583         goto out;
3584 
3585 #ifdef CONFIG_PROC_FS
3586     if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3587                  sizeof(struct seq_net_private)))
3588         goto err_sysctl;
3589 #endif
3590 
3591     net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3592                           sizeof(spinlock_t), GFP_KERNEL);
3593     if (!net->unx.table.locks)
3594         goto err_proc;
3595 
3596     net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3597                         sizeof(struct hlist_head),
3598                         GFP_KERNEL);
3599     if (!net->unx.table.buckets)
3600         goto free_locks;
3601 
3602     for (i = 0; i < UNIX_HASH_SIZE; i++) {
3603         spin_lock_init(&net->unx.table.locks[i]);
3604         INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3605     }
3606 
3607     return 0;
3608 
3609 free_locks:
3610     kvfree(net->unx.table.locks);
3611 err_proc:
3612 #ifdef CONFIG_PROC_FS
3613     remove_proc_entry("unix", net->proc_net);
3614 err_sysctl:
3615 #endif
3616     unix_sysctl_unregister(net);
3617 out:
3618     return -ENOMEM;
3619 }
3620 
3621 static void __net_exit unix_net_exit(struct net *net)
3622 {
3623     kvfree(net->unx.table.buckets);
3624     kvfree(net->unx.table.locks);
3625     unix_sysctl_unregister(net);
3626     remove_proc_entry("unix", net->proc_net);
3627 }
3628 
3629 static struct pernet_operations unix_net_ops = {
3630     .init = unix_net_init,
3631     .exit = unix_net_exit,
3632 };
3633 
3634 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3635 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3636              struct unix_sock *unix_sk, uid_t uid)
3637 
3638 #define INIT_BATCH_SZ 16
3639 
3640 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3641 {
3642     struct bpf_unix_iter_state *iter = priv_data;
3643     int err;
3644 
3645     err = bpf_iter_init_seq_net(priv_data, aux);
3646     if (err)
3647         return err;
3648 
3649     err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3650     if (err) {
3651         bpf_iter_fini_seq_net(priv_data);
3652         return err;
3653     }
3654 
3655     return 0;
3656 }
3657 
3658 static void bpf_iter_fini_unix(void *priv_data)
3659 {
3660     struct bpf_unix_iter_state *iter = priv_data;
3661 
3662     bpf_iter_fini_seq_net(priv_data);
3663     kvfree(iter->batch);
3664 }
3665 
3666 static const struct bpf_iter_seq_info unix_seq_info = {
3667     .seq_ops        = &bpf_iter_unix_seq_ops,
3668     .init_seq_private   = bpf_iter_init_unix,
3669     .fini_seq_private   = bpf_iter_fini_unix,
3670     .seq_priv_size      = sizeof(struct bpf_unix_iter_state),
3671 };
3672 
3673 static const struct bpf_func_proto *
3674 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3675                  const struct bpf_prog *prog)
3676 {
3677     switch (func_id) {
3678     case BPF_FUNC_setsockopt:
3679         return &bpf_sk_setsockopt_proto;
3680     case BPF_FUNC_getsockopt:
3681         return &bpf_sk_getsockopt_proto;
3682     default:
3683         return NULL;
3684     }
3685 }
3686 
3687 static struct bpf_iter_reg unix_reg_info = {
3688     .target         = "unix",
3689     .ctx_arg_info_size  = 1,
3690     .ctx_arg_info       = {
3691         { offsetof(struct bpf_iter__unix, unix_sk),
3692           PTR_TO_BTF_ID_OR_NULL },
3693     },
3694     .get_func_proto         = bpf_iter_unix_get_func_proto,
3695     .seq_info       = &unix_seq_info,
3696 };
3697 
3698 static void __init bpf_iter_register(void)
3699 {
3700     unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3701     if (bpf_iter_reg_target(&unix_reg_info))
3702         pr_warn("Warning: could not register bpf iterator unix\n");
3703 }
3704 #endif
3705 
3706 static int __init af_unix_init(void)
3707 {
3708     int i, rc = -1;
3709 
3710     BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3711 
3712     for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3713         spin_lock_init(&bsd_socket_locks[i]);
3714         INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3715     }
3716 
3717     rc = proto_register(&unix_dgram_proto, 1);
3718     if (rc != 0) {
3719         pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3720         goto out;
3721     }
3722 
3723     rc = proto_register(&unix_stream_proto, 1);
3724     if (rc != 0) {
3725         pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3726         goto out;
3727     }
3728 
3729     sock_register(&unix_family_ops);
3730     register_pernet_subsys(&unix_net_ops);
3731     unix_bpf_build_proto();
3732 
3733 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3734     bpf_iter_register();
3735 #endif
3736 
3737 out:
3738     return rc;
3739 }
3740 
3741 static void __exit af_unix_exit(void)
3742 {
3743     sock_unregister(PF_UNIX);
3744     proto_unregister(&unix_dgram_proto);
3745     proto_unregister(&unix_stream_proto);
3746     unregister_pernet_subsys(&unix_net_ops);
3747 }
3748 
3749 /* Earlier than device_initcall() so that other drivers invoking
3750    request_module() don't end up in a loop when modprobe tries
3751    to use a UNIX socket. But later than subsys_initcall() because
3752    we depend on stuff initialised there */
3753 fs_initcall(af_unix_init);
3754 module_exit(af_unix_exit);
3755 
3756 MODULE_LICENSE("GPL");
3757 MODULE_ALIAS_NETPROTO(PF_UNIX);