]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/unix/af_unix.c
Linux 6.16-rc5
[thirdparty/kernel/stable.git] / net / unix / af_unix.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
113aa838 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4 6 *
1da177e4
LT
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
1da177e4
LT
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
5cc208be 78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
f9af583a
KI
80#include <linux/bpf-cgroup.h>
81#include <linux/btf_ids.h>
1da177e4 82#include <linux/dcache.h>
f9af583a 83#include <linux/errno.h>
1da177e4 84#include <linux/fcntl.h>
f9af583a 85#include <linux/file.h>
b6459415 86#include <linux/filter.h>
1da177e4 87#include <linux/fs.h>
a9194f88 88#include <linux/fs_struct.h>
1da177e4 89#include <linux/init.h>
f9af583a 90#include <linux/kernel.h>
f9af583a
KI
91#include <linux/mount.h>
92#include <linux/namei.h>
a9194f88
CB
93#include <linux/net.h>
94#include <linux/pidfs.h>
1da177e4 95#include <linux/poll.h>
f9af583a 96#include <linux/proc_fs.h>
f9af583a 97#include <linux/sched/signal.h>
1da177e4 98#include <linux/security.h>
f9af583a 99#include <linux/seq_file.h>
f9af583a
KI
100#include <linux/skbuff.h>
101#include <linux/slab.h>
102#include <linux/socket.h>
509f15b9 103#include <linux/splice.h>
f9af583a 104#include <linux/string.h>
f9af583a 105#include <linux/uaccess.h>
f9af583a 106#include <net/af_unix.h>
f9af583a
KI
107#include <net/net_namespace.h>
108#include <net/scm.h>
f9af583a 109#include <net/tcp_states.h>
0083e3e3
KI
110#include <uapi/linux/sockios.h>
111#include <uapi/linux/termios.h>
1da177e4 112
84960bf2
KI
113#include "af_unix.h"
114
518de9b3 115static atomic_long_t unix_nr_socks;
51bae889
KI
116static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
117static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
1da177e4 118
f452be49 119/* SMP locking strategy:
2f7ca90a
KI
120 * hash table is protected with spinlock.
121 * each socket state is protected by separate spinlock.
f452be49 122 */
3955802f
KI
123#ifdef CONFIG_PROVE_LOCKING
124#define cmp_ptr(l, r) (((l) > (r)) - ((l) < (r)))
125
126static int unix_table_lock_cmp_fn(const struct lockdep_map *a,
127 const struct lockdep_map *b)
128{
129 return cmp_ptr(a, b);
130}
ed998228
KI
131
132static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
133 const struct lockdep_map *_b)
134{
135 const struct unix_sock *a, *b;
136
137 a = container_of(_a, struct unix_sock, lock.dep_map);
138 b = container_of(_b, struct unix_sock, lock.dep_map);
139
98f706de
KI
140 if (a->sk.sk_state == TCP_LISTEN) {
141 /* unix_stream_connect(): Before the 2nd unix_state_lock(),
142 *
143 * 1. a is TCP_LISTEN.
144 * 2. b is not a.
145 * 3. concurrent connect(b -> a) must fail.
146 *
147 * Except for 2. & 3., the b's state can be any possible
148 * value due to concurrent connect() or listen().
149 *
150 * 2. is detected in debug_spin_lock_before(), and 3. cannot
151 * be expressed as lock_cmp_fn.
152 */
153 switch (b->sk.sk_state) {
154 case TCP_CLOSE:
155 case TCP_ESTABLISHED:
156 case TCP_LISTEN:
157 return -1;
158 default:
159 /* Invalid case. */
160 return 0;
161 }
162 }
163
164 /* Should never happen. Just to be symmetric. */
165 if (b->sk.sk_state == TCP_LISTEN) {
166 switch (b->sk.sk_state) {
167 case TCP_CLOSE:
168 case TCP_ESTABLISHED:
169 return 1;
170 default:
171 return 0;
172 }
173 }
174
ed998228
KI
175 /* unix_state_double_lock(): ascending address order. */
176 return cmp_ptr(a, b);
177}
8647ece4
KI
178
179static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
180 const struct lockdep_map *_b)
181{
182 const struct sock *a, *b;
183
184 a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
185 b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
186
187 /* unix_collect_skb(): listener -> embryo order. */
188 if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
189 return -1;
190
191 /* Should never happen. Just to be symmetric. */
192 if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b)
193 return 1;
194
195 return 0;
196}
3955802f 197#endif
1da177e4 198
f452be49 199static unsigned int unix_unbound_hash(struct sock *sk)
7123aaa3 200{
f452be49 201 unsigned long hash = (unsigned long)sk;
7123aaa3
ED
202
203 hash ^= hash >> 16;
204 hash ^= hash >> 8;
f452be49
KI
205 hash ^= sk->sk_type;
206
cf21b355 207 return hash & UNIX_HASH_MOD;
f452be49
KI
208}
209
210static unsigned int unix_bsd_hash(struct inode *i)
211{
f302d180 212 return i->i_ino & UNIX_HASH_MOD;
f452be49
KI
213}
214
215static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
216 int addr_len, int type)
217{
218 __wsum csum = csum_partial(sunaddr, addr_len, 0);
219 unsigned int hash;
220
221 hash = (__force unsigned int)csum_fold(csum);
222 hash ^= hash >> 8;
223 hash ^= type;
224
cf21b355 225 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
7123aaa3
ED
226}
227
79b05bea
KI
228static void unix_table_double_lock(struct net *net,
229 unsigned int hash1, unsigned int hash2)
afd20b92 230{
cf21b355
KI
231 if (hash1 == hash2) {
232 spin_lock(&net->unx.table.locks[hash1]);
233 return;
234 }
235
afd20b92
KI
236 if (hash1 > hash2)
237 swap(hash1, hash2);
238
79b05bea 239 spin_lock(&net->unx.table.locks[hash1]);
3955802f 240 spin_lock(&net->unx.table.locks[hash2]);
afd20b92
KI
241}
242
79b05bea
KI
243static void unix_table_double_unlock(struct net *net,
244 unsigned int hash1, unsigned int hash2)
afd20b92 245{
cf21b355
KI
246 if (hash1 == hash2) {
247 spin_unlock(&net->unx.table.locks[hash1]);
248 return;
249 }
250
79b05bea
KI
251 spin_unlock(&net->unx.table.locks[hash1]);
252 spin_unlock(&net->unx.table.locks[hash2]);
afd20b92
KI
253}
254
877ce7c1 255#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 256static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 257{
37a9a8df 258 UNIXCB(skb).secid = scm->secid;
877ce7c1
CZ
259}
260
261static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
262{
37a9a8df
SS
263 scm->secid = UNIXCB(skb).secid;
264}
265
266static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
267{
268 return (scm->secid == UNIXCB(skb).secid);
877ce7c1
CZ
269}
270#else
dc49c1f9 271static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
272{ }
273
274static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
275{ }
37a9a8df
SS
276
277static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
278{
279 return true;
280}
877ce7c1
CZ
281#endif /* CONFIG_SECURITY_NETWORK */
282
1da177e4
LT
283static inline int unix_may_send(struct sock *sk, struct sock *osk)
284{
bf61ffeb 285 return !unix_peer(osk) || unix_peer(osk) == sk;
1da177e4
LT
286}
287
86b18aaa
QC
288static inline int unix_recvq_full_lockless(const struct sock *sk)
289{
45d872f0 290 return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
86b18aaa
QC
291}
292
fa7ff56f 293struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
294{
295 struct sock *peer;
296
1c92b4e5 297 unix_state_lock(s);
1da177e4
LT
298 peer = unix_peer(s);
299 if (peer)
300 sock_hold(peer);
1c92b4e5 301 unix_state_unlock(s);
1da177e4
LT
302 return peer;
303}
fa7ff56f 304EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4 305
12f21c49
KI
306static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
307 int addr_len)
308{
309 struct unix_address *addr;
310
311 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
312 if (!addr)
313 return NULL;
314
315 refcount_set(&addr->refcnt, 1);
316 addr->len = addr_len;
317 memcpy(addr->name, sunaddr, addr_len);
318
319 return addr;
320}
321
1da177e4
LT
322static inline void unix_release_addr(struct unix_address *addr)
323{
8c9814b9 324 if (refcount_dec_and_test(&addr->refcnt))
1da177e4
LT
325 kfree(addr);
326}
327
328/*
329 * Check unix socket name:
330 * - should be not zero length.
331 * - if started by not zero, should be NULL terminated (FS object)
332 * - if started by zero, it is abstract name.
333 */
ac7bfa62 334
b8a58aa6
KI
335static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
336{
337 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
338 addr_len > sizeof(*sunaddr))
339 return -EINVAL;
340
341 if (sunaddr->sun_family != AF_UNIX)
342 return -EINVAL;
343
344 return 0;
345}
346
ecb4534b 347static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
d2d8c9fd 348{
ecb4534b
KI
349 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
350 short offset = offsetof(struct sockaddr_storage, __data);
351
352 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
353
d2d8c9fd
KI
354 /* This may look like an off by one error but it is a bit more
355 * subtle. 108 is the longest valid AF_UNIX path for a binding.
356 * sun_path[108] doesn't as such exist. However in kernel space
357 * we are guaranteed that it is a valid memory location in our
358 * kernel address buffer because syscall functions always pass
359 * a pointer of struct sockaddr_storage which has a bigger buffer
ecb4534b
KI
360 * than 108. Also, we must terminate sun_path for strlen() in
361 * getname_kernel().
362 */
363 addr->__data[addr_len - offset] = 0;
364
365 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
366 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
367 * know the actual buffer.
d2d8c9fd 368 */
ecb4534b 369 return strlen(addr->__data) + offset + 1;
d2d8c9fd
KI
370}
371
1da177e4
LT
372static void __unix_remove_socket(struct sock *sk)
373{
374 sk_del_node_init(sk);
375}
376
cf2f225e 377static void __unix_insert_socket(struct net *net, struct sock *sk)
1da177e4 378{
dd29c67d 379 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
cf2f225e 380 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
1da177e4
LT
381}
382
cf2f225e
KI
383static void __unix_set_addr_hash(struct net *net, struct sock *sk,
384 struct unix_address *addr, unsigned int hash)
185ab886
AV
385{
386 __unix_remove_socket(sk);
387 smp_store_release(&unix_sk(sk)->addr, addr);
e6b4b873
KI
388
389 sk->sk_hash = hash;
cf2f225e 390 __unix_insert_socket(net, sk);
185ab886
AV
391}
392
79b05bea 393static void unix_remove_socket(struct net *net, struct sock *sk)
1da177e4 394{
79b05bea 395 spin_lock(&net->unx.table.locks[sk->sk_hash]);
1da177e4 396 __unix_remove_socket(sk);
79b05bea 397 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
1da177e4
LT
398}
399
79b05bea 400static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
1da177e4 401{
79b05bea 402 spin_lock(&net->unx.table.locks[sk->sk_hash]);
cf2f225e 403 __unix_insert_socket(net, sk);
79b05bea 404 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
1da177e4
LT
405}
406
51bae889
KI
407static void unix_insert_bsd_socket(struct sock *sk)
408{
409 spin_lock(&bsd_socket_locks[sk->sk_hash]);
410 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
411 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
412}
413
414static void unix_remove_bsd_socket(struct sock *sk)
415{
416 if (!hlist_unhashed(&sk->sk_bind_node)) {
417 spin_lock(&bsd_socket_locks[sk->sk_hash]);
418 __sk_del_bind_node(sk);
419 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
420
421 sk_node_init(&sk->sk_bind_node);
422 }
423}
424
097e66c5
DL
425static struct sock *__unix_find_socket_byname(struct net *net,
426 struct sockaddr_un *sunname,
be752283 427 int len, unsigned int hash)
1da177e4
LT
428{
429 struct sock *s;
1da177e4 430
cf2f225e 431 sk_for_each(s, &net->unx.table.buckets[hash]) {
1da177e4
LT
432 struct unix_sock *u = unix_sk(s);
433
434 if (u->addr->len == len &&
435 !memcmp(u->addr->name, sunname, len))
262ce0af 436 return s;
1da177e4 437 }
262ce0af 438 return NULL;
1da177e4
LT
439}
440
097e66c5
DL
441static inline struct sock *unix_find_socket_byname(struct net *net,
442 struct sockaddr_un *sunname,
be752283 443 int len, unsigned int hash)
1da177e4
LT
444{
445 struct sock *s;
446
79b05bea 447 spin_lock(&net->unx.table.locks[hash]);
be752283 448 s = __unix_find_socket_byname(net, sunname, len, hash);
1da177e4
LT
449 if (s)
450 sock_hold(s);
79b05bea 451 spin_unlock(&net->unx.table.locks[hash]);
1da177e4
LT
452 return s;
453}
454
51bae889 455static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4 456{
f452be49 457 unsigned int hash = unix_bsd_hash(i);
1da177e4 458 struct sock *s;
1da177e4 459
51bae889
KI
460 spin_lock(&bsd_socket_locks[hash]);
461 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
40ffe67d 462 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 463
beef5121 464 if (dentry && d_backing_inode(dentry) == i) {
1da177e4 465 sock_hold(s);
51bae889 466 spin_unlock(&bsd_socket_locks[hash]);
afd20b92 467 return s;
1da177e4
LT
468 }
469 }
51bae889 470 spin_unlock(&bsd_socket_locks[hash]);
afd20b92 471 return NULL;
1da177e4
LT
472}
473
7d267278
RW
474/* Support code for asymmetrically connected dgram sockets
475 *
476 * If a datagram socket is connected to a socket not itself connected
477 * to the first socket (eg, /dev/log), clients may only enqueue more
478 * messages if the present receive queue of the server socket is not
479 * "too large". This means there's a second writeability condition
480 * poll and sendmsg need to test. The dgram recv code will do a wake
481 * up on the peer_wait wait queue of a socket upon reception of a
482 * datagram which needs to be propagated to sleeping would-be writers
483 * since these might not have sent anything so far. This can't be
484 * accomplished via poll_wait because the lifetime of the server
485 * socket might be less than that of its clients if these break their
486 * association with it or if the server socket is closed while clients
487 * are still connected to it and there's no way to inform "a polling
488 * implementation" that it should let go of a certain wait queue
489 *
ac6424b9 490 * In order to propagate a wake up, a wait_queue_entry_t of the client
7d267278
RW
491 * socket is enqueued on the peer_wait queue of the server socket
492 * whose wake function does a wake_up on the ordinary client socket
493 * wait queue. This connection is established whenever a write (or
494 * poll for write) hit the flow control condition and broken when the
495 * association to the server socket is dissolved or after a wake up
496 * was relayed.
497 */
498
ac6424b9 499static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
7d267278
RW
500 void *key)
501{
502 struct unix_sock *u;
503 wait_queue_head_t *u_sleep;
504
505 u = container_of(q, struct unix_sock, peer_wake);
506
507 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
508 q);
509 u->peer_wake.private = NULL;
510
511 /* relaying can only happen while the wq still exists */
512 u_sleep = sk_sleep(&u->sk);
513 if (u_sleep)
3ad6f93e 514 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
7d267278
RW
515
516 return 0;
517}
518
519static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
520{
521 struct unix_sock *u, *u_other;
522 int rc;
523
524 u = unix_sk(sk);
525 u_other = unix_sk(other);
526 rc = 0;
527 spin_lock(&u_other->peer_wait.lock);
528
529 if (!u->peer_wake.private) {
530 u->peer_wake.private = other;
531 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
532
533 rc = 1;
534 }
535
536 spin_unlock(&u_other->peer_wait.lock);
537 return rc;
538}
539
540static void unix_dgram_peer_wake_disconnect(struct sock *sk,
541 struct sock *other)
542{
543 struct unix_sock *u, *u_other;
544
545 u = unix_sk(sk);
546 u_other = unix_sk(other);
547 spin_lock(&u_other->peer_wait.lock);
548
549 if (u->peer_wake.private == other) {
550 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
551 u->peer_wake.private = NULL;
552 }
553
554 spin_unlock(&u_other->peer_wait.lock);
555}
556
557static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
558 struct sock *other)
559{
560 unix_dgram_peer_wake_disconnect(sk, other);
561 wake_up_interruptible_poll(sk_sleep(sk),
a9a08845
LT
562 EPOLLOUT |
563 EPOLLWRNORM |
564 EPOLLWRBAND);
7d267278
RW
565}
566
567/* preconditions:
568 * - unix_peer(sk) == other
569 * - association is stable
570 */
571static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
572{
573 int connected;
574
575 connected = unix_dgram_peer_wake_connect(sk, other);
576
51f7e951
JB
577 /* If other is SOCK_DEAD, we want to make sure we signal
578 * POLLOUT, such that a subsequent write() can get a
579 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
580 * to other and its full, we will hang waiting for POLLOUT.
581 */
662a8094 582 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
7d267278
RW
583 return 1;
584
585 if (connected)
586 unix_dgram_peer_wake_disconnect(sk, other);
587
588 return 0;
589}
590
eb0718fb 591static int unix_writable(const struct sock *sk, unsigned char state)
1da177e4 592{
eb0718fb 593 return state != TCP_LISTEN &&
b0632e53 594 (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
1da177e4
LT
595}
596
597static void unix_write_space(struct sock *sk)
598{
43815482
ED
599 struct socket_wq *wq;
600
601 rcu_read_lock();
eb0718fb 602 if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
43815482 603 wq = rcu_dereference(sk->sk_wq);
1ce0bf50 604 if (skwq_has_sleeper(wq))
67426b75 605 wake_up_interruptible_sync_poll(&wq->wait,
a9a08845 606 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
1abe267f 607 sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 608 }
43815482 609 rcu_read_unlock();
1da177e4
LT
610}
611
612/* When dgram socket disconnects (or changes its peer), we clear its receive
613 * queue of packets arrived from previous peer. First, it allows to do
614 * flow control based only on wmem_alloc; second, sk connected to peer
615 * may receive messages only from that peer. */
616static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
617{
b03efcfb 618 if (!skb_queue_empty(&sk->sk_receive_queue)) {
b3e365bb
KI
619 skb_queue_purge_reason(&sk->sk_receive_queue,
620 SKB_DROP_REASON_UNIX_DISCONNECT);
621
1da177e4
LT
622 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
623
624 /* If one link of bidirectional dgram pipe is disconnected,
625 * we signal error. Messages are lost. Do not make this,
626 * when peer was not connected to us.
627 */
628 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
cc04410a 629 WRITE_ONCE(other->sk_err, ECONNRESET);
e3ae2365 630 sk_error_report(other);
1da177e4
LT
631 }
632 }
633}
634
635static void unix_sock_destructor(struct sock *sk)
636{
637 struct unix_sock *u = unix_sk(sk);
638
4d0446b7 639 skb_queue_purge_reason(&sk->sk_receive_queue, SKB_DROP_REASON_SOCKET_CLOSE);
1da177e4 640
dd29c67d
ED
641 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
642 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
643 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
1da177e4 644 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 645 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
646 return;
647 }
648
fd0a109a
CB
649 if (sk->sk_peer_pid)
650 pidfs_put_pid(sk->sk_peer_pid);
651
1da177e4
LT
652 if (u->addr)
653 unix_release_addr(u->addr);
654
518de9b3 655 atomic_long_dec(&unix_nr_socks);
a8076d8d 656 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1da177e4 657#ifdef UNIX_REFCNT_DEBUG
5cc208be 658 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 659 atomic_long_read(&unix_nr_socks));
1da177e4
LT
660#endif
661}
662
2a5a4841
KI
663static unsigned int unix_skb_len(const struct sk_buff *skb)
664{
665 return skb->len - UNIXCB(skb).consumed;
666}
667
ded34e0f 668static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
669{
670 struct unix_sock *u = unix_sk(sk);
1da177e4
LT
671 struct sock *skpair;
672 struct sk_buff *skb;
79b05bea 673 struct path path;
1da177e4
LT
674 int state;
675
79b05bea 676 unix_remove_socket(sock_net(sk), sk);
51bae889 677 unix_remove_bsd_socket(sk);
1da177e4
LT
678
679 /* Clear state */
1c92b4e5 680 unix_state_lock(sk);
1da177e4 681 sock_orphan(sk);
e1d09c2c 682 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
40ffe67d
AV
683 path = u->path;
684 u->path.dentry = NULL;
685 u->path.mnt = NULL;
1da177e4 686 state = sk->sk_state;
942238f9 687 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
a494bd64
ED
688
689 skpair = unix_peer(sk);
690 unix_peer(sk) = NULL;
691
1c92b4e5 692 unix_state_unlock(sk);
1da177e4 693
7a62ed61 694#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
8594d9b8 695 u->oob_skb = NULL;
7a62ed61
KI
696#endif
697
1da177e4
LT
698 wake_up_interruptible_all(&u->peer_wait);
699
e27dfcea 700 if (skpair != NULL) {
1da177e4 701 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
2a5a4841
KI
702 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
703
704#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
705 if (skb && !unix_skb_len(skb))
706 skb = skb_peek_next(skb, &sk->sk_receive_queue);
707#endif
1c92b4e5 708 unix_state_lock(skpair);
1da177e4 709 /* No more writes */
e1d09c2c 710 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
2a5a4841 711 if (skb || embrion)
cc04410a 712 WRITE_ONCE(skpair->sk_err, ECONNRESET);
1c92b4e5 713 unix_state_unlock(skpair);
1da177e4 714 skpair->sk_state_change(skpair);
8d8ad9d7 715 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 716 }
7d267278
RW
717
718 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4 719 sock_put(skpair); /* It may now die */
1da177e4
LT
720 }
721
722 /* Try to flush out this socket. Throw out buffers at least */
723
724 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 725 if (state == TCP_LISTEN)
1da177e4 726 unix_release_sock(skb->sk, 1);
b5c08988 727
c32f0bd7
KI
728 /* passed fds are erased in the kfree_skb hook */
729 kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
1da177e4
LT
730 }
731
40ffe67d
AV
732 if (path.dentry)
733 path_put(&path);
1da177e4
LT
734
735 sock_put(sk);
736
737 /* ---- Socket is dead now and most probably destroyed ---- */
738
739 /*
e04dae84 740 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
741 * ECONNRESET and we die on the spot. In Linux we behave
742 * like files and pipes do and wait for the last
743 * dereference.
744 *
745 * Can't we simply set sock->err?
746 *
747 * What the above comment does talk about? --ANK(980817)
748 */
749
ade32bd8 750 if (READ_ONCE(unix_tot_inflight))
ac7bfa62 751 unix_gc(); /* Garbage collect fds */
1da177e4
LT
752}
753
fd0a109a
CB
754struct unix_peercred {
755 struct pid *peer_pid;
756 const struct cred *peer_cred;
757};
758
759static inline int prepare_peercred(struct unix_peercred *peercred)
faf489e6 760{
fd0a109a
CB
761 struct pid *pid;
762 int err;
763
764 pid = task_tgid(current);
765 err = pidfs_register_pid(pid);
766 if (likely(!err)) {
767 peercred->peer_pid = get_pid(pid);
768 peercred->peer_cred = get_current_cred();
769 }
770 return err;
faf489e6
KI
771}
772
fd0a109a
CB
773static void drop_peercred(struct unix_peercred *peercred)
774{
775 const struct cred *cred = NULL;
776 struct pid *pid = NULL;
777
778 might_sleep();
779
780 swap(peercred->peer_pid, pid);
781 swap(peercred->peer_cred, cred);
782
783 pidfs_put_pid(pid);
784 put_pid(pid);
785 put_cred(cred);
786}
787
788static inline void init_peercred(struct sock *sk,
789 const struct unix_peercred *peercred)
790{
791 sk->sk_peer_pid = peercred->peer_pid;
792 sk->sk_peer_cred = peercred->peer_cred;
793}
794
795static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
109f6e39 796{
35306eb2
ED
797 const struct cred *old_cred;
798 struct pid *old_pid;
799
800 spin_lock(&sk->sk_peer_lock);
801 old_pid = sk->sk_peer_pid;
802 old_cred = sk->sk_peer_cred;
fd0a109a 803 init_peercred(sk, peercred);
35306eb2
ED
804 spin_unlock(&sk->sk_peer_lock);
805
fd0a109a
CB
806 peercred->peer_pid = old_pid;
807 peercred->peer_cred = old_cred;
109f6e39
EB
808}
809
810static void copy_peercred(struct sock *sk, struct sock *peersk)
811{
22e5751b 812 lockdep_assert_held(&unix_sk(peersk)->lock);
e4bd881d 813
22e5751b
KI
814 spin_lock(&sk->sk_peer_lock);
815 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
fd0a109a 816 pidfs_get_pid(sk->sk_peer_pid);
109f6e39 817 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
35306eb2 818 spin_unlock(&sk->sk_peer_lock);
109f6e39
EB
819}
820
350d4546
KI
821static bool unix_may_passcred(const struct sock *sk)
822{
0e81cfd9 823 return sk->sk_scm_credentials || sk->sk_scm_pidfd;
350d4546
KI
824}
825
1da177e4
LT
826static int unix_listen(struct socket *sock, int backlog)
827{
828 int err;
829 struct sock *sk = sock->sk;
830 struct unix_sock *u = unix_sk(sk);
fd0a109a 831 struct unix_peercred peercred = {};
1da177e4
LT
832
833 err = -EOPNOTSUPP;
6eba6a37
ED
834 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
835 goto out; /* Only stream/seqpacket sockets accept */
1da177e4 836 err = -EINVAL;
97e1db06 837 if (!READ_ONCE(u->addr))
6eba6a37 838 goto out; /* No listens on an unbound socket */
fd0a109a
CB
839 err = prepare_peercred(&peercred);
840 if (err)
841 goto out;
1c92b4e5 842 unix_state_lock(sk);
1da177e4
LT
843 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
844 goto out_unlock;
845 if (backlog > sk->sk_max_ack_backlog)
846 wake_up_interruptible_all(&u->peer_wait);
847 sk->sk_max_ack_backlog = backlog;
942238f9
KI
848 WRITE_ONCE(sk->sk_state, TCP_LISTEN);
849
1da177e4 850 /* set credentials so connect can copy them */
fd0a109a 851 update_peercred(sk, &peercred);
1da177e4
LT
852 err = 0;
853
854out_unlock:
1c92b4e5 855 unix_state_unlock(sk);
fd0a109a 856 drop_peercred(&peercred);
1da177e4
LT
857out:
858 return err;
859}
860
861static int unix_release(struct socket *);
862static int unix_bind(struct socket *, struct sockaddr *, int);
863static int unix_stream_connect(struct socket *, struct sockaddr *,
864 int addr_len, int flags);
865static int unix_socketpair(struct socket *, struct socket *);
92ef0fd5 866static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
9b2c45d4 867static int unix_getname(struct socket *, struct sockaddr *, int);
a11e1d43
LT
868static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
869static __poll_t unix_dgram_poll(struct file *, struct socket *,
870 poll_table *);
1da177e4 871static int unix_ioctl(struct socket *, unsigned int, unsigned long);
5f6beb9e
AB
872#ifdef CONFIG_COMPAT
873static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
874#endif
1da177e4 875static int unix_shutdown(struct socket *, int);
1b784140
YX
876static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
877static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
2b514574
HFS
878static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
879 struct pipe_inode_info *, size_t size,
880 unsigned int flags);
1b784140
YX
881static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
882static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
965b57b4
CW
883static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
884static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
1da177e4
LT
885static int unix_dgram_connect(struct socket *, struct sockaddr *,
886 int, int);
1b784140
YX
887static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
888static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
889 int);
1da177e4 890
5c05a164 891#ifdef CONFIG_PROC_FS
de437089
KT
892static int unix_count_nr_fds(struct sock *sk)
893{
894 struct sk_buff *skb;
895 struct unix_sock *u;
896 int nr_fds = 0;
897
898 spin_lock(&sk->sk_receive_queue.lock);
899 skb = skb_peek(&sk->sk_receive_queue);
900 while (skb) {
901 u = unix_sk(skb->sk);
902 nr_fds += atomic_read(&u->scm_stat.nr_fds);
903 skb = skb_peek_next(skb, &sk->sk_receive_queue);
904 }
905 spin_unlock(&sk->sk_receive_queue.lock);
906
907 return nr_fds;
908}
909
3c32da19
KT
910static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
911{
912 struct sock *sk = sock->sk;
b27401a3 913 unsigned char s_state;
3c32da19 914 struct unix_sock *u;
b27401a3 915 int nr_fds = 0;
3c32da19
KT
916
917 if (sk) {
b27401a3 918 s_state = READ_ONCE(sk->sk_state);
de437089 919 u = unix_sk(sk);
de437089 920
b27401a3
KT
921 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
922 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
923 * SOCK_DGRAM is ordinary. So, no lock is needed.
924 */
925 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
de437089 926 nr_fds = atomic_read(&u->scm_stat.nr_fds);
b27401a3 927 else if (s_state == TCP_LISTEN)
de437089 928 nr_fds = unix_count_nr_fds(sk);
b27401a3 929
de437089 930 seq_printf(m, "scm_fds: %u\n", nr_fds);
3c32da19
KT
931 }
932}
3a12500e
TK
933#else
934#define unix_show_fdinfo NULL
935#endif
f55bb7f9 936
90ddc4f0 937static const struct proto_ops unix_stream_ops = {
1da177e4
LT
938 .family = PF_UNIX,
939 .owner = THIS_MODULE,
940 .release = unix_release,
941 .bind = unix_bind,
942 .connect = unix_stream_connect,
943 .socketpair = unix_socketpair,
944 .accept = unix_accept,
945 .getname = unix_getname,
a11e1d43 946 .poll = unix_poll,
1da177e4 947 .ioctl = unix_ioctl,
5f6beb9e
AB
948#ifdef CONFIG_COMPAT
949 .compat_ioctl = unix_compat_ioctl,
950#endif
1da177e4
LT
951 .listen = unix_listen,
952 .shutdown = unix_shutdown,
1da177e4
LT
953 .sendmsg = unix_stream_sendmsg,
954 .recvmsg = unix_stream_recvmsg,
965b57b4 955 .read_skb = unix_stream_read_skb,
1da177e4 956 .mmap = sock_no_mmap,
2b514574 957 .splice_read = unix_stream_splice_read,
56667da7 958 .set_peek_off = sk_set_peek_off,
3c32da19 959 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
960};
961
90ddc4f0 962static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
963 .family = PF_UNIX,
964 .owner = THIS_MODULE,
965 .release = unix_release,
966 .bind = unix_bind,
967 .connect = unix_dgram_connect,
968 .socketpair = unix_socketpair,
969 .accept = sock_no_accept,
970 .getname = unix_getname,
a11e1d43 971 .poll = unix_dgram_poll,
1da177e4 972 .ioctl = unix_ioctl,
5f6beb9e
AB
973#ifdef CONFIG_COMPAT
974 .compat_ioctl = unix_compat_ioctl,
975#endif
1da177e4
LT
976 .listen = sock_no_listen,
977 .shutdown = unix_shutdown,
1da177e4 978 .sendmsg = unix_dgram_sendmsg,
965b57b4 979 .read_skb = unix_read_skb,
1da177e4
LT
980 .recvmsg = unix_dgram_recvmsg,
981 .mmap = sock_no_mmap,
56667da7 982 .set_peek_off = sk_set_peek_off,
3c32da19 983 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
984};
985
90ddc4f0 986static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
987 .family = PF_UNIX,
988 .owner = THIS_MODULE,
989 .release = unix_release,
990 .bind = unix_bind,
991 .connect = unix_stream_connect,
992 .socketpair = unix_socketpair,
993 .accept = unix_accept,
994 .getname = unix_getname,
a11e1d43 995 .poll = unix_dgram_poll,
1da177e4 996 .ioctl = unix_ioctl,
5f6beb9e
AB
997#ifdef CONFIG_COMPAT
998 .compat_ioctl = unix_compat_ioctl,
999#endif
1da177e4
LT
1000 .listen = unix_listen,
1001 .shutdown = unix_shutdown,
1da177e4 1002 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 1003 .recvmsg = unix_seqpacket_recvmsg,
1da177e4 1004 .mmap = sock_no_mmap,
56667da7 1005 .set_peek_off = sk_set_peek_off,
3c32da19 1006 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
1007};
1008
c7272e15
CW
1009static void unix_close(struct sock *sk, long timeout)
1010{
1011 /* Nothing to do here, unix socket does not need a ->close().
1012 * This is merely for sockmap.
1013 */
1014}
1015
7b26952a
AM
1016static bool unix_bpf_bypass_getsockopt(int level, int optname)
1017{
1018 if (level == SOL_SOCKET) {
1019 switch (optname) {
1020 case SO_PEERPIDFD:
1021 return true;
1022 default:
1023 return false;
1024 }
1025 }
1026
1027 return false;
1028}
1029
94531cfc 1030struct proto unix_dgram_proto = {
0edf0824 1031 .name = "UNIX",
248969ae 1032 .owner = THIS_MODULE,
248969ae 1033 .obj_size = sizeof(struct unix_sock),
c7272e15 1034 .close = unix_close,
7b26952a 1035 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
c6382918 1036#ifdef CONFIG_BPF_SYSCALL
94531cfc 1037 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
c6382918 1038#endif
1da177e4
LT
1039};
1040
94531cfc
JW
1041struct proto unix_stream_proto = {
1042 .name = "UNIX-STREAM",
248969ae 1043 .owner = THIS_MODULE,
248969ae 1044 .obj_size = sizeof(struct unix_sock),
c7272e15 1045 .close = unix_close,
7b26952a 1046 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
c6382918 1047#ifdef CONFIG_BPF_SYSCALL
94531cfc 1048 .psock_update_sk_prot = unix_stream_bpf_update_proto,
c6382918 1049#endif
1da177e4
LT
1050};
1051
94531cfc 1052static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1da177e4 1053{
1da177e4 1054 struct unix_sock *u;
f4bd73b5
KI
1055 struct sock *sk;
1056 int err;
1da177e4 1057
518de9b3 1058 atomic_long_inc(&unix_nr_socks);
f4bd73b5
KI
1059 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
1060 err = -ENFILE;
1061 goto err;
1062 }
1da177e4 1063
94531cfc
JW
1064 if (type == SOCK_STREAM)
1065 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
1066 else /*dgram and seqpacket */
1067 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
1068
f4bd73b5
KI
1069 if (!sk) {
1070 err = -ENOMEM;
1071 goto err;
1072 }
1da177e4 1073
6eba6a37 1074 sock_init_data(sock, sk);
1da177e4 1075
77cbe1a6 1076 sk->sk_scm_rights = 1;
e6b4b873 1077 sk->sk_hash = unix_unbound_hash(sk);
3aa9799e 1078 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1da177e4 1079 sk->sk_write_space = unix_write_space;
bd9f2d05 1080 sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
1da177e4 1081 sk->sk_destruct = unix_sock_destructor;
8647ece4
KI
1082 lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
1083
97af84a6 1084 u = unix_sk(sk);
aed6ecef 1085 u->listener = NULL;
1fbfdfaa 1086 u->vertex = NULL;
40ffe67d
AV
1087 u->path.dentry = NULL;
1088 u->path.mnt = NULL;
fd19f329 1089 spin_lock_init(&u->lock);
ed998228 1090 lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL);
6e1ce3c3
LT
1091 mutex_init(&u->iolock); /* single task reading lock */
1092 mutex_init(&u->bindlock); /* single task binding lock */
1da177e4 1093 init_waitqueue_head(&u->peer_wait);
7d267278 1094 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
3c32da19 1095 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
79b05bea 1096 unix_insert_unbound_socket(net, sk);
f4bd73b5 1097
340c3d33 1098 sock_prot_inuse_add(net, sk->sk_prot, 1);
f4bd73b5 1099
1da177e4 1100 return sk;
f4bd73b5
KI
1101
1102err:
1103 atomic_long_dec(&unix_nr_socks);
1104 return ERR_PTR(err);
1da177e4
LT
1105}
1106
3f378b68
EP
1107static int unix_create(struct net *net, struct socket *sock, int protocol,
1108 int kern)
1da177e4 1109{
f4bd73b5
KI
1110 struct sock *sk;
1111
1da177e4
LT
1112 if (protocol && protocol != PF_UNIX)
1113 return -EPROTONOSUPPORT;
1114
1115 sock->state = SS_UNCONNECTED;
1116
1117 switch (sock->type) {
1118 case SOCK_STREAM:
1119 sock->ops = &unix_stream_ops;
1120 break;
1121 /*
1122 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1123 * nothing uses it.
1124 */
1125 case SOCK_RAW:
e27dfcea 1126 sock->type = SOCK_DGRAM;
df561f66 1127 fallthrough;
1da177e4
LT
1128 case SOCK_DGRAM:
1129 sock->ops = &unix_dgram_ops;
1130 break;
1131 case SOCK_SEQPACKET:
1132 sock->ops = &unix_seqpacket_ops;
1133 break;
1134 default:
1135 return -ESOCKTNOSUPPORT;
1136 }
1137
f4bd73b5
KI
1138 sk = unix_create1(net, sock, kern, sock->type);
1139 if (IS_ERR(sk))
1140 return PTR_ERR(sk);
1141
1142 return 0;
1da177e4
LT
1143}
1144
1145static int unix_release(struct socket *sock)
1146{
1147 struct sock *sk = sock->sk;
1148
1149 if (!sk)
1150 return 0;
1151
c7272e15 1152 sk->sk_prot->close(sk, 0);
ded34e0f 1153 unix_release_sock(sk, 0);
1da177e4
LT
1154 sock->sk = NULL;
1155
ded34e0f 1156 return 0;
1da177e4
LT
1157}
1158
51bae889 1159static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
a9194f88 1160 int type, int flags)
fa39ef0e
KI
1161{
1162 struct inode *inode;
1163 struct path path;
1164 struct sock *sk;
1165 int err;
1166
d2d8c9fd 1167 unix_mkname_bsd(sunaddr, addr_len);
fa39ef0e 1168
a9194f88
CB
1169 if (flags & SOCK_COREDUMP) {
1170 const struct cred *cred;
1171 struct cred *kcred;
1172 struct path root;
1173
1174 kcred = prepare_kernel_cred(&init_task);
1175 if (!kcred) {
1176 err = -ENOMEM;
1177 goto fail;
1178 }
1179
1180 task_lock(&init_task);
1181 get_fs_root(init_task.fs, &root);
1182 task_unlock(&init_task);
1183
1184 cred = override_creds(kcred);
1185 err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path,
1186 LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS |
1187 LOOKUP_NO_MAGICLINKS, &path);
1188 put_cred(revert_creds(cred));
1189 path_put(&root);
1190 if (err)
1191 goto fail;
1192 } else {
1193 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1194 if (err)
1195 goto fail;
1196
1197 err = path_permission(&path, MAY_WRITE);
1198 if (err)
1199 goto path_put;
1200 }
fa39ef0e
KI
1201
1202 err = -ECONNREFUSED;
1203 inode = d_backing_inode(path.dentry);
1204 if (!S_ISSOCK(inode->i_mode))
1205 goto path_put;
1206
51bae889 1207 sk = unix_find_socket_byinode(inode);
fa39ef0e
KI
1208 if (!sk)
1209 goto path_put;
1210
1211 err = -EPROTOTYPE;
1212 if (sk->sk_type == type)
1213 touch_atime(&path);
1214 else
1215 goto sock_put;
1216
1217 path_put(&path);
1218
1219 return sk;
1220
1221sock_put:
1222 sock_put(sk);
1223path_put:
1224 path_put(&path);
1225fail:
aed26f55 1226 return ERR_PTR(err);
fa39ef0e
KI
1227}
1228
1229static struct sock *unix_find_abstract(struct net *net,
1230 struct sockaddr_un *sunaddr,
d2d8c9fd 1231 int addr_len, int type)
fa39ef0e 1232{
f452be49 1233 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
fa39ef0e
KI
1234 struct dentry *dentry;
1235 struct sock *sk;
1236
f452be49 1237 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
aed26f55
KI
1238 if (!sk)
1239 return ERR_PTR(-ECONNREFUSED);
fa39ef0e
KI
1240
1241 dentry = unix_sk(sk)->path.dentry;
1242 if (dentry)
1243 touch_atime(&unix_sk(sk)->path);
1244
1245 return sk;
1246}
1247
1248static struct sock *unix_find_other(struct net *net,
1249 struct sockaddr_un *sunaddr,
a9194f88 1250 int addr_len, int type, int flags)
fa39ef0e
KI
1251{
1252 struct sock *sk;
1253
1254 if (sunaddr->sun_path[0])
a9194f88 1255 sk = unix_find_bsd(sunaddr, addr_len, type, flags);
fa39ef0e 1256 else
d2d8c9fd 1257 sk = unix_find_abstract(net, sunaddr, addr_len, type);
fa39ef0e
KI
1258
1259 return sk;
1260}
1261
f7ed31f4 1262static int unix_autobind(struct sock *sk)
1da177e4 1263{
1da177e4 1264 struct unix_sock *u = unix_sk(sk);
51d1b25a 1265 unsigned int new_hash, old_hash;
79b05bea 1266 struct net *net = sock_net(sk);
6eba6a37 1267 struct unix_address *addr;
9acbc584 1268 u32 lastnum, ordernum;
f7ed31f4 1269 int err;
1da177e4 1270
6e1ce3c3 1271 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7
SL
1272 if (err)
1273 return err;
1da177e4 1274
1da177e4
LT
1275 if (u->addr)
1276 goto out;
1277
1278 err = -ENOMEM;
755662ce
KI
1279 addr = kzalloc(sizeof(*addr) +
1280 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1da177e4
LT
1281 if (!addr)
1282 goto out;
1283
9acbc584 1284 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1da177e4 1285 addr->name->sun_family = AF_UNIX;
8c9814b9 1286 refcount_set(&addr->refcnt, 1);
1da177e4 1287
51d1b25a 1288 old_hash = sk->sk_hash;
a251c17a 1289 ordernum = get_random_u32();
9acbc584 1290 lastnum = ordernum & 0xFFFFF;
1da177e4 1291retry:
9acbc584
KI
1292 ordernum = (ordernum + 1) & 0xFFFFF;
1293 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1da177e4 1294
e6b4b873 1295 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
79b05bea 1296 unix_table_double_lock(net, old_hash, new_hash);
1da177e4 1297
79b05bea
KI
1298 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1299 unix_table_double_unlock(net, old_hash, new_hash);
afd20b92 1300
9acbc584 1301 /* __unix_find_socket_byname() may take long time if many names
8df73ff9
TH
1302 * are already in use.
1303 */
1304 cond_resched();
9acbc584
KI
1305
1306 if (ordernum == lastnum) {
1307 /* Give up if all names seems to be in use. */
8df73ff9 1308 err = -ENOSPC;
9acbc584 1309 unix_release_addr(addr);
8df73ff9
TH
1310 goto out;
1311 }
9acbc584 1312
1da177e4
LT
1313 goto retry;
1314 }
1da177e4 1315
cf2f225e 1316 __unix_set_addr_hash(net, sk, addr, new_hash);
79b05bea 1317 unix_table_double_unlock(net, old_hash, new_hash);
1da177e4
LT
1318 err = 0;
1319
6e1ce3c3 1320out: mutex_unlock(&u->bindlock);
1da177e4
LT
1321 return err;
1322}
1323
12f21c49
KI
1324static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1325 int addr_len)
faf02010 1326{
71e6be6f
AV
1327 umode_t mode = S_IFSOCK |
1328 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
12f21c49 1329 struct unix_sock *u = unix_sk(sk);
51d1b25a 1330 unsigned int new_hash, old_hash;
79b05bea 1331 struct net *net = sock_net(sk);
abf08576 1332 struct mnt_idmap *idmap;
12f21c49 1333 struct unix_address *addr;
38f7bd94 1334 struct dentry *dentry;
12f21c49 1335 struct path parent;
71e6be6f
AV
1336 int err;
1337
ecb4534b 1338 addr_len = unix_mkname_bsd(sunaddr, addr_len);
12f21c49
KI
1339 addr = unix_create_addr(sunaddr, addr_len);
1340 if (!addr)
1341 return -ENOMEM;
1342
38f7bd94
LT
1343 /*
1344 * Get the parent directory, calculate the hash for last
1345 * component.
1346 */
71e6be6f 1347 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
12f21c49
KI
1348 if (IS_ERR(dentry)) {
1349 err = PTR_ERR(dentry);
1350 goto out;
1351 }
faf02010 1352
38f7bd94
LT
1353 /*
1354 * All right, let's create it.
1355 */
abf08576 1356 idmap = mnt_idmap(parent.mnt);
71e6be6f 1357 err = security_path_mknod(&parent, dentry, mode, 0);
56c1731b 1358 if (!err)
abf08576 1359 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
c0c3b8d3 1360 if (err)
12f21c49 1361 goto out_path;
fa42d910 1362 err = mutex_lock_interruptible(&u->bindlock);
c0c3b8d3
AV
1363 if (err)
1364 goto out_unlink;
1365 if (u->addr)
1366 goto out_unlock;
fa42d910 1367
51d1b25a 1368 old_hash = sk->sk_hash;
e6b4b873 1369 new_hash = unix_bsd_hash(d_backing_inode(dentry));
79b05bea 1370 unix_table_double_lock(net, old_hash, new_hash);
56c1731b
AV
1371 u->path.mnt = mntget(parent.mnt);
1372 u->path.dentry = dget(dentry);
cf2f225e 1373 __unix_set_addr_hash(net, sk, addr, new_hash);
79b05bea 1374 unix_table_double_unlock(net, old_hash, new_hash);
51bae889 1375 unix_insert_bsd_socket(sk);
fa42d910 1376 mutex_unlock(&u->bindlock);
56c1731b 1377 done_path_create(&parent, dentry);
fa42d910 1378 return 0;
c0c3b8d3
AV
1379
1380out_unlock:
1381 mutex_unlock(&u->bindlock);
1382 err = -EINVAL;
1383out_unlink:
1384 /* failed after successful mknod? unlink what we'd created... */
abf08576 1385 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
12f21c49 1386out_path:
c0c3b8d3 1387 done_path_create(&parent, dentry);
12f21c49
KI
1388out:
1389 unix_release_addr(addr);
1390 return err == -EEXIST ? -EADDRINUSE : err;
fa42d910
AV
1391}
1392
12f21c49
KI
1393static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1394 int addr_len)
fa42d910
AV
1395{
1396 struct unix_sock *u = unix_sk(sk);
51d1b25a 1397 unsigned int new_hash, old_hash;
79b05bea 1398 struct net *net = sock_net(sk);
12f21c49 1399 struct unix_address *addr;
fa42d910
AV
1400 int err;
1401
12f21c49
KI
1402 addr = unix_create_addr(sunaddr, addr_len);
1403 if (!addr)
1404 return -ENOMEM;
1405
fa42d910
AV
1406 err = mutex_lock_interruptible(&u->bindlock);
1407 if (err)
12f21c49 1408 goto out;
fa42d910
AV
1409
1410 if (u->addr) {
12f21c49
KI
1411 err = -EINVAL;
1412 goto out_mutex;
fa42d910
AV
1413 }
1414
51d1b25a 1415 old_hash = sk->sk_hash;
e6b4b873 1416 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
79b05bea 1417 unix_table_double_lock(net, old_hash, new_hash);
12f21c49 1418
79b05bea 1419 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
12f21c49
KI
1420 goto out_spin;
1421
cf2f225e 1422 __unix_set_addr_hash(net, sk, addr, new_hash);
79b05bea 1423 unix_table_double_unlock(net, old_hash, new_hash);
fa42d910
AV
1424 mutex_unlock(&u->bindlock);
1425 return 0;
12f21c49
KI
1426
1427out_spin:
79b05bea 1428 unix_table_double_unlock(net, old_hash, new_hash);
12f21c49
KI
1429 err = -EADDRINUSE;
1430out_mutex:
1431 mutex_unlock(&u->bindlock);
1432out:
1433 unix_release_addr(addr);
1434 return err;
fa42d910
AV
1435}
1436
1da177e4
LT
1437static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1438{
e27dfcea 1439 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
5c32a3ed 1440 struct sock *sk = sock->sk;
5c32a3ed 1441 int err;
1da177e4 1442
b8a58aa6
KI
1443 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1444 sunaddr->sun_family == AF_UNIX)
f7ed31f4 1445 return unix_autobind(sk);
1da177e4 1446
b8a58aa6
KI
1447 err = unix_validate_addr(sunaddr, addr_len);
1448 if (err)
1449 return err;
1450
12f21c49
KI
1451 if (sunaddr->sun_path[0])
1452 err = unix_bind_bsd(sk, sunaddr, addr_len);
fa42d910 1453 else
12f21c49
KI
1454 err = unix_bind_abstract(sk, sunaddr, addr_len);
1455
1456 return err;
1da177e4
LT
1457}
1458
278a3de5
DM
1459static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1460{
1461 if (unlikely(sk1 == sk2) || !sk2) {
1462 unix_state_lock(sk1);
1463 return;
1464 }
ed998228 1465
4d322dce
ED
1466 if (sk1 > sk2)
1467 swap(sk1, sk2);
1468
1469 unix_state_lock(sk1);
ed998228 1470 unix_state_lock(sk2);
278a3de5
DM
1471}
1472
1473static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1474{
1475 if (unlikely(sk1 == sk2) || !sk2) {
1476 unix_state_unlock(sk1);
1477 return;
1478 }
1479 unix_state_unlock(sk1);
1480 unix_state_unlock(sk2);
1481}
1482
1da177e4
LT
1483static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1484 int alen, int flags)
1485{
e27dfcea 1486 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
340c3d33 1487 struct sock *sk = sock->sk;
1da177e4 1488 struct sock *other;
1da177e4
LT
1489 int err;
1490
defbcf2d
MJ
1491 err = -EINVAL;
1492 if (alen < offsetofend(struct sockaddr, sa_family))
1493 goto out;
1494
1da177e4 1495 if (addr->sa_family != AF_UNSPEC) {
b8a58aa6
KI
1496 err = unix_validate_addr(sunaddr, alen);
1497 if (err)
1498 goto out;
1499
859051dd
DDM
1500 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1501 if (err)
1502 goto out;
1503
350d4546 1504 if (unix_may_passcred(sk) && !READ_ONCE(unix_sk(sk)->addr)) {
f7ed31f4
KI
1505 err = unix_autobind(sk);
1506 if (err)
1507 goto out;
1508 }
1da177e4 1509
278a3de5 1510restart:
a9194f88 1511 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type, 0);
aed26f55
KI
1512 if (IS_ERR(other)) {
1513 err = PTR_ERR(other);
1da177e4 1514 goto out;
aed26f55 1515 }
1da177e4 1516
278a3de5
DM
1517 unix_state_double_lock(sk, other);
1518
1519 /* Apparently VFS overslept socket death. Retry. */
1520 if (sock_flag(other, SOCK_DEAD)) {
1521 unix_state_double_unlock(sk, other);
1522 sock_put(other);
1523 goto restart;
1524 }
1da177e4
LT
1525
1526 err = -EPERM;
1527 if (!unix_may_send(sk, other))
1528 goto out_unlock;
1529
1530 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1531 if (err)
1532 goto out_unlock;
1533
942238f9
KI
1534 WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1535 WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1da177e4
LT
1536 } else {
1537 /*
1538 * 1003.1g breaking connected state with AF_UNSPEC
1539 */
1540 other = NULL;
278a3de5 1541 unix_state_double_lock(sk, other);
1da177e4
LT
1542 }
1543
1544 /*
1545 * If it was connected, reconnect.
1546 */
1547 if (unix_peer(sk)) {
1548 struct sock *old_peer = unix_peer(sk);
dc56ad70 1549
e27dfcea 1550 unix_peer(sk) = other;
dc56ad70 1551 if (!other)
942238f9 1552 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
7d267278
RW
1553 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1554
278a3de5 1555 unix_state_double_unlock(sk, other);
1da177e4 1556
26bfb8b5 1557 if (other != old_peer) {
1da177e4 1558 unix_dgram_disconnected(sk, old_peer);
26bfb8b5
KI
1559
1560 unix_state_lock(old_peer);
1561 if (!unix_peer(old_peer))
1562 WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1563 unix_state_unlock(old_peer);
1564 }
1565
1da177e4
LT
1566 sock_put(old_peer);
1567 } else {
e27dfcea 1568 unix_peer(sk) = other;
278a3de5 1569 unix_state_double_unlock(sk, other);
1da177e4 1570 }
83301b53 1571
ac7bfa62 1572 return 0;
1da177e4
LT
1573
1574out_unlock:
278a3de5 1575 unix_state_double_unlock(sk, other);
1da177e4
LT
1576 sock_put(other);
1577out:
1578 return err;
1579}
1580
1581static long unix_wait_for_peer(struct sock *other, long timeo)
1582{
1583 struct unix_sock *u = unix_sk(other);
1584 int sched;
1585 DEFINE_WAIT(wait);
1586
1587 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1588
1589 sched = !sock_flag(other, SOCK_DEAD) &&
1590 !(other->sk_shutdown & RCV_SHUTDOWN) &&
679ed006 1591 unix_recvq_full_lockless(other);
1da177e4 1592
1c92b4e5 1593 unix_state_unlock(other);
1da177e4
LT
1594
1595 if (sched)
1596 timeo = schedule_timeout(timeo);
1597
1598 finish_wait(&u->peer_wait, &wait);
1599 return timeo;
1600}
1601
1602static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1603 int addr_len, int flags)
1604{
e27dfcea 1605 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
340c3d33 1606 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1da177e4 1607 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
fd0a109a 1608 struct unix_peercred peercred = {};
340c3d33 1609 struct net *net = sock_net(sk);
1da177e4 1610 struct sk_buff *skb = NULL;
1ca27e0c 1611 unsigned char state;
1da177e4 1612 long timeo;
340c3d33 1613 int err;
1da177e4 1614
b8a58aa6
KI
1615 err = unix_validate_addr(sunaddr, addr_len);
1616 if (err)
1617 goto out;
1618
859051dd
DDM
1619 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1620 if (err)
1621 goto out;
1622
350d4546 1623 if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
f7ed31f4
KI
1624 err = unix_autobind(sk);
1625 if (err)
1626 goto out;
1627 }
1da177e4
LT
1628
1629 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1630
1631 /* First of all allocate resources.
e26ee0a7
KI
1632 * If we will make it after state is locked,
1633 * we will have to recheck all again in any case.
1da177e4
LT
1634 */
1635
1da177e4 1636 /* create new sock for complete connection */
340c3d33 1637 newsk = unix_create1(net, NULL, 0, sock->type);
f4bd73b5
KI
1638 if (IS_ERR(newsk)) {
1639 err = PTR_ERR(newsk);
1da177e4 1640 goto out;
f4bd73b5
KI
1641 }
1642
fd0a109a
CB
1643 err = prepare_peercred(&peercred);
1644 if (err)
1645 goto out;
1646
1da177e4
LT
1647 /* Allocate skb for sending to listening sock */
1648 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
34c899af
KI
1649 if (!skb) {
1650 err = -ENOMEM;
e26ee0a7 1651 goto out_free_sk;
34c899af 1652 }
1da177e4
LT
1653
1654restart:
1655 /* Find listening sock. */
a9194f88 1656 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, flags);
aed26f55
KI
1657 if (IS_ERR(other)) {
1658 err = PTR_ERR(other);
e26ee0a7 1659 goto out_free_skb;
aed26f55 1660 }
1da177e4 1661
1c92b4e5 1662 unix_state_lock(other);
1da177e4
LT
1663
1664 /* Apparently VFS overslept socket death. Retry. */
1665 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1666 unix_state_unlock(other);
1da177e4
LT
1667 sock_put(other);
1668 goto restart;
1669 }
1670
34c899af
KI
1671 if (other->sk_state != TCP_LISTEN ||
1672 other->sk_shutdown & RCV_SHUTDOWN) {
1673 err = -ECONNREFUSED;
77238f2b 1674 goto out_unlock;
34c899af 1675 }
1da177e4 1676
45d872f0 1677 if (unix_recvq_full_lockless(other)) {
34c899af
KI
1678 if (!timeo) {
1679 err = -EAGAIN;
1da177e4 1680 goto out_unlock;
34c899af 1681 }
1da177e4
LT
1682
1683 timeo = unix_wait_for_peer(other, timeo);
e26ee0a7 1684 sock_put(other);
1da177e4
LT
1685
1686 err = sock_intr_errno(timeo);
1687 if (signal_pending(current))
e26ee0a7
KI
1688 goto out_free_skb;
1689
1da177e4 1690 goto restart;
ac7bfa62 1691 }
1da177e4 1692
1ca27e0c
KI
1693 /* self connect and simultaneous connect are eliminated
1694 * by rejecting TCP_LISTEN socket to avoid deadlock.
1da177e4 1695 */
1ca27e0c
KI
1696 state = READ_ONCE(sk->sk_state);
1697 if (unlikely(state != TCP_CLOSE)) {
1698 err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1da177e4
LT
1699 goto out_unlock;
1700 }
1701
98f706de 1702 unix_state_lock(sk);
1da177e4 1703
1ca27e0c
KI
1704 if (unlikely(sk->sk_state != TCP_CLOSE)) {
1705 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1c92b4e5 1706 unix_state_unlock(sk);
1ca27e0c 1707 goto out_unlock;
1da177e4
LT
1708 }
1709
3610cda5 1710 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1711 if (err) {
1c92b4e5 1712 unix_state_unlock(sk);
1da177e4
LT
1713 goto out_unlock;
1714 }
1715
1716 /* The way is open! Fastly set all the necessary fields... */
1717
1718 sock_hold(sk);
3f84d577
KI
1719 unix_peer(newsk) = sk;
1720 newsk->sk_state = TCP_ESTABLISHED;
1721 newsk->sk_type = sk->sk_type;
1722 newsk->sk_scm_recv_flags = other->sk_scm_recv_flags;
fd0a109a 1723 init_peercred(newsk, &peercred);
3f84d577 1724
1da177e4 1725 newu = unix_sk(newsk);
aed6ecef 1726 newu->listener = other;
eaefd110 1727 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1728 otheru = unix_sk(other);
1729
ae3b5641
AV
1730 /* copy address information from listening to new sock
1731 *
1732 * The contents of *(otheru->addr) and otheru->path
1733 * are seen fully set up here, since we have found
2f7ca90a
KI
1734 * otheru in hash under its lock. Insertion into the
1735 * hash chain we'd found it in had been done in an
1736 * earlier critical area protected by the chain's lock,
ae3b5641
AV
1737 * the same one where we'd set *(otheru->addr) contents,
1738 * as well as otheru->path and otheru->addr itself.
1739 *
1740 * Using smp_store_release() here to set newu->addr
1741 * is enough to make those stores, as well as stores
1742 * to newu->path visible to anyone who gets newu->addr
1743 * by smp_load_acquire(). IOW, the same warranties
1744 * as for unix_sock instances bound in unix_bind() or
1745 * in unix_autobind().
1746 */
40ffe67d
AV
1747 if (otheru->path.dentry) {
1748 path_get(&otheru->path);
1749 newu->path = otheru->path;
1da177e4 1750 }
ae3b5641
AV
1751 refcount_inc(&otheru->addr->refcnt);
1752 smp_store_release(&newu->addr, otheru->addr);
1da177e4
LT
1753
1754 /* Set credentials */
109f6e39 1755 copy_peercred(sk, other);
1da177e4 1756
1da177e4 1757 sock->state = SS_CONNECTED;
942238f9 1758 WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
830a1e5c
BL
1759 sock_hold(newsk);
1760
4e857c58 1761 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1762 unix_peer(sk) = newsk;
1da177e4 1763
1c92b4e5 1764 unix_state_unlock(sk);
1da177e4 1765
4e03d073 1766 /* take ten and send info to listening sock */
1da177e4
LT
1767 spin_lock(&other->sk_receive_queue.lock);
1768 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1769 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1770 unix_state_unlock(other);
676d2369 1771 other->sk_data_ready(other);
1da177e4
LT
1772 sock_put(other);
1773 return 0;
1774
1775out_unlock:
e26ee0a7
KI
1776 unix_state_unlock(other);
1777 sock_put(other);
1778out_free_skb:
085e6cba 1779 consume_skb(skb);
e26ee0a7
KI
1780out_free_sk:
1781 unix_release_sock(newsk, 0);
1782out:
fd0a109a 1783 drop_peercred(&peercred);
1da177e4
LT
1784 return err;
1785}
1786
1787static int unix_socketpair(struct socket *socka, struct socket *sockb)
1788{
fd0a109a 1789 struct unix_peercred ska_peercred = {}, skb_peercred = {};
e27dfcea 1790 struct sock *ska = socka->sk, *skb = sockb->sk;
fd0a109a
CB
1791 int err;
1792
1793 err = prepare_peercred(&ska_peercred);
1794 if (err)
1795 return err;
1796
1797 err = prepare_peercred(&skb_peercred);
1798 if (err) {
1799 drop_peercred(&ska_peercred);
1800 return err;
1801 }
1da177e4
LT
1802
1803 /* Join our sockets back to back */
1804 sock_hold(ska);
1805 sock_hold(skb);
e27dfcea
JK
1806 unix_peer(ska) = skb;
1807 unix_peer(skb) = ska;
fd0a109a
CB
1808 init_peercred(ska, &ska_peercred);
1809 init_peercred(skb, &skb_peercred);
1da177e4 1810
83301b53
CW
1811 ska->sk_state = TCP_ESTABLISHED;
1812 skb->sk_state = TCP_ESTABLISHED;
1813 socka->state = SS_CONNECTED;
1814 sockb->state = SS_CONNECTED;
1da177e4
LT
1815 return 0;
1816}
1817
92ef0fd5
JA
1818static int unix_accept(struct socket *sock, struct socket *newsock,
1819 struct proto_accept_arg *arg)
1da177e4
LT
1820{
1821 struct sock *sk = sock->sk;
1da177e4 1822 struct sk_buff *skb;
aed6ecef 1823 struct sock *tsk;
1da177e4 1824
92ef0fd5 1825 arg->err = -EOPNOTSUPP;
6eba6a37 1826 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1827 goto out;
1828
92ef0fd5 1829 arg->err = -EINVAL;
1b536948 1830 if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
1da177e4
LT
1831 goto out;
1832
1833 /* If socket state is TCP_LISTEN it cannot change (for now...),
1834 * so that no locks are necessary.
1835 */
1836
92ef0fd5
JA
1837 skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1838 &arg->err);
1da177e4
LT
1839 if (!skb) {
1840 /* This means receive shutdown. */
92ef0fd5
JA
1841 if (arg->err == 0)
1842 arg->err = -EINVAL;
1da177e4
LT
1843 goto out;
1844 }
1845
1846 tsk = skb->sk;
1847 skb_free_datagram(sk, skb);
1848 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1849
1850 /* attach accepted sock to socket */
1c92b4e5 1851 unix_state_lock(tsk);
fd863448 1852 unix_update_edges(unix_sk(tsk));
1da177e4
LT
1853 newsock->state = SS_CONNECTED;
1854 sock_graft(tsk, newsock);
1c92b4e5 1855 unix_state_unlock(tsk);
1da177e4
LT
1856 return 0;
1857
1858out:
92ef0fd5 1859 return arg->err;
1da177e4
LT
1860}
1861
1862
9b2c45d4 1863static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1da177e4
LT
1864{
1865 struct sock *sk = sock->sk;
ae3b5641 1866 struct unix_address *addr;
13cfa97b 1867 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1868 int err = 0;
1869
1870 if (peer) {
1871 sk = unix_peer_get(sk);
1872
1873 err = -ENOTCONN;
1874 if (!sk)
1875 goto out;
1876 err = 0;
1877 } else {
1878 sock_hold(sk);
1879 }
1880
ae3b5641
AV
1881 addr = smp_load_acquire(&unix_sk(sk)->addr);
1882 if (!addr) {
1da177e4
LT
1883 sunaddr->sun_family = AF_UNIX;
1884 sunaddr->sun_path[0] = 0;
755662ce 1885 err = offsetof(struct sockaddr_un, sun_path);
1da177e4 1886 } else {
9b2c45d4
DV
1887 err = addr->len;
1888 memcpy(sunaddr, addr->name, addr->len);
859051dd
DDM
1889
1890 if (peer)
1891 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1892 CGROUP_UNIX_GETPEERNAME);
1893 else
1894 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1895 CGROUP_UNIX_GETSOCKNAME);
1da177e4 1896 }
1da177e4
LT
1897 sock_put(sk);
1898out:
1899 return err;
1900}
1901
99a7a5b9
KI
1902/* The "user->unix_inflight" variable is protected by the garbage
1903 * collection lock, and we just read it locklessly here. If you go
1904 * over the limit, there might be a tiny race in actually noticing
1905 * it across threads. Tough.
1906 */
1907static inline bool too_many_unix_fds(struct task_struct *p)
1908{
1909 struct user_struct *user = current_user();
1910
1911 if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1912 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1913 return false;
1914}
1915
1916static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1917{
99a7a5b9
KI
1918 if (too_many_unix_fds(current))
1919 return -ETOOMANYREFS;
1920
7c349ed0
KI
1921 UNIXCB(skb).fp = scm->fp;
1922 scm->fp = NULL;
99a7a5b9 1923
1fbfdfaa
KI
1924 if (unix_prepare_fpl(UNIXCB(skb).fp))
1925 return -ENOMEM;
1926
99a7a5b9
KI
1927 return 0;
1928}
1929
1930static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1931{
99a7a5b9
KI
1932 scm->fp = UNIXCB(skb).fp;
1933 UNIXCB(skb).fp = NULL;
1934
1fbfdfaa 1935 unix_destroy_fpl(scm->fp);
99a7a5b9
KI
1936}
1937
cbcf0112
MS
1938static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1939{
1940 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
cbcf0112
MS
1941}
1942
99a7a5b9
KI
1943static void unix_destruct_scm(struct sk_buff *skb)
1944{
1945 struct scm_cookie scm;
1946
1947 memset(&scm, 0, sizeof(scm));
1948 scm.pid = UNIXCB(skb).pid;
1949 if (UNIXCB(skb).fp)
1950 unix_detach_fds(&scm, skb);
1951
1952 /* Alas, it calls VFS */
1953 /* So fscking what? fput() had been SMP-safe since the last Summer */
1954 scm_destroy(&scm);
1955 sock_wfree(skb);
1956}
1957
f78a5fda 1958static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1959{
1960 int err = 0;
16e57262 1961
3041bbbe 1962 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1963 UNIXCB(skb).uid = scm->creds.uid;
1964 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1965 UNIXCB(skb).fp = NULL;
37a9a8df 1966 unix_get_secdata(scm, skb);
7361c36c
EB
1967 if (scm->fp && send_fds)
1968 err = unix_attach_fds(scm, skb);
1969
1970 skb->destructor = unix_destruct_scm;
1971 return err;
1972}
1973
16e57262
ED
1974/*
1975 * Some apps rely on write() giving SCM_CREDENTIALS
1976 * We include credentials if source or destination socket
1977 * asserted SOCK_PASSCRED.
1978 */
3041bbbe
KI
1979static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
1980 const struct sock *other)
16e57262 1981{
6b0ee8c0 1982 if (UNIXCB(skb).pid)
16e57262 1983 return;
350d4546 1984
43fb2b30
KI
1985 if (unix_may_passcred(sk) || unix_may_passcred(other) ||
1986 !other->sk_socket) {
3041bbbe 1987 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1988 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1989 }
1990}
1991
9490f886
HFS
1992static bool unix_skb_scm_eq(struct sk_buff *skb,
1993 struct scm_cookie *scm)
1994{
b146cbf2
KC
1995 return UNIXCB(skb).pid == scm->pid &&
1996 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1997 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
9490f886
HFS
1998 unix_secdata_eq(scm, skb);
1999}
2000
3c32da19
KT
2001static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
2002{
2003 struct scm_fp_list *fp = UNIXCB(skb).fp;
2004 struct unix_sock *u = unix_sk(sk);
2005
42f298c0 2006 if (unlikely(fp && fp->count)) {
7782040b 2007 atomic_add(fp->count, &u->scm_stat.nr_fds);
42f298c0
KI
2008 unix_add_edges(fp, u);
2009 }
3c32da19
KT
2010}
2011
2012static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
2013{
2014 struct scm_fp_list *fp = UNIXCB(skb).fp;
2015 struct unix_sock *u = unix_sk(sk);
2016
42f298c0 2017 if (unlikely(fp && fp->count)) {
7782040b 2018 atomic_sub(fp->count, &u->scm_stat.nr_fds);
42f298c0
KI
2019 unix_del_edges(fp);
2020 }
3c32da19
KT
2021}
2022
1da177e4
LT
2023/*
2024 * Send AF_UNIX data.
2025 */
2026
1b784140
YX
2027static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
2028 size_t len)
1da177e4 2029{
340c3d33
KI
2030 struct sock *sk = sock->sk, *other = NULL;
2031 struct unix_sock *u = unix_sk(sk);
7cc05662 2032 struct scm_cookie scm;
340c3d33 2033 struct sk_buff *skb;
eb6a2481 2034 int data_len = 0;
7d267278 2035 int sk_locked;
340c3d33
KI
2036 long timeo;
2037 int err;
1da177e4 2038
7cc05662 2039 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
2040 if (err < 0)
2041 return err;
2042
d9f21b36
KI
2043 wait_for_unix_gc(scm.fp);
2044
001a2508
KI
2045 if (msg->msg_flags & MSG_OOB) {
2046 err = -EOPNOTSUPP;
1da177e4 2047 goto out;
001a2508 2048 }
1da177e4
LT
2049
2050 if (msg->msg_namelen) {
3c05329a 2051 err = unix_validate_addr(msg->msg_name, msg->msg_namelen);
b8a58aa6
KI
2052 if (err)
2053 goto out;
859051dd
DDM
2054
2055 err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
2056 msg->msg_name,
2057 &msg->msg_namelen,
2058 NULL);
2059 if (err)
2060 goto out;
1da177e4
LT
2061 }
2062
350d4546 2063 if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
f7ed31f4
KI
2064 err = unix_autobind(sk);
2065 if (err)
2066 goto out;
2067 }
1da177e4 2068
001a2508
KI
2069 if (len > READ_ONCE(sk->sk_sndbuf) - 32) {
2070 err = -EMSGSIZE;
1da177e4 2071 goto out;
001a2508 2072 }
1da177e4 2073
31ff6aa5 2074 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
2075 data_len = min_t(size_t,
2076 len - SKB_MAX_ALLOC,
2077 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
2078 data_len = PAGE_ALIGN(data_len);
2079
2080 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
2081 }
eb6a2481
ED
2082
2083 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
2084 msg->msg_flags & MSG_DONTWAIT, &err,
2085 PAGE_ALLOC_COSTLY_ORDER);
62c6db25 2086 if (!skb)
1da177e4
LT
2087 goto out;
2088
7cc05662 2089 err = unix_scm_to_skb(&scm, skb, true);
25888e30 2090 if (err < 0)
7361c36c 2091 goto out_free;
877ce7c1 2092
eb6a2481
ED
2093 skb_put(skb, len - data_len);
2094 skb->data_len = data_len;
2095 skb->len = len;
c0371da6 2096 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
2097 if (err)
2098 goto out_free;
2099
2100 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2101
62c6db25 2102 if (msg->msg_namelen) {
a700b433 2103lookup:
3c05329a 2104 other = unix_find_other(sock_net(sk), msg->msg_name,
a9194f88 2105 msg->msg_namelen, sk->sk_type, 0);
aed26f55
KI
2106 if (IS_ERR(other)) {
2107 err = PTR_ERR(other);
62c6db25
KI
2108 goto out_free;
2109 }
2110 } else {
2111 other = unix_peer_get(sk);
2112 if (!other) {
2113 err = -ENOTCONN;
1da177e4 2114 goto out_free;
aed26f55 2115 }
1da177e4
LT
2116 }
2117
d6ae3bae
AC
2118 if (sk_filter(other, skb) < 0) {
2119 /* Toss the packet but do not return any error to the sender */
2120 err = len;
62c6db25 2121 goto out_sock_put;
d6ae3bae
AC
2122 }
2123
a700b433 2124restart:
7d267278 2125 sk_locked = 0;
1c92b4e5 2126 unix_state_lock(other);
7d267278 2127restart_locked:
001a2508
KI
2128
2129 if (!unix_may_send(sk, other)) {
2130 err = -EPERM;
1da177e4 2131 goto out_unlock;
001a2508 2132 }
1da177e4 2133
7d267278 2134 if (unlikely(sock_flag(other, SOCK_DEAD))) {
106d979b 2135 /* Check with 1003.1g - what should datagram error */
1da177e4 2136
106d979b 2137 unix_state_unlock(other);
7d267278 2138
3ff8bff7
KT
2139 if (sk->sk_type == SOCK_SEQPACKET) {
2140 /* We are here only when racing with unix_release_sock()
2141 * is clearing @other. Never change state to TCP_CLOSE
2142 * unlike SOCK_DGRAM wants.
2143 */
3ff8bff7 2144 err = -EPIPE;
62c6db25 2145 goto out_sock_put;
106d979b
KI
2146 }
2147
2148 if (!sk_locked)
2149 unix_state_lock(sk);
2150
2151 if (unix_peer(sk) == other) {
e27dfcea 2152 unix_peer(sk) = NULL;
7d267278
RW
2153 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2154
942238f9 2155 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1c92b4e5 2156 unix_state_unlock(sk);
1da177e4
LT
2157
2158 unix_dgram_disconnected(sk, other);
2159 sock_put(other);
2160 err = -ECONNREFUSED;
62c6db25 2161 goto out_sock_put;
1da177e4
LT
2162 }
2163
106d979b
KI
2164 unix_state_unlock(sk);
2165
2166 if (!msg->msg_namelen) {
2167 err = -ECONNRESET;
62c6db25 2168 goto out_sock_put;
106d979b 2169 }
a700b433 2170
bc23d4e3 2171 sock_put(other);
a700b433 2172 goto lookup;
1da177e4
LT
2173 }
2174
001a2508
KI
2175 if (other->sk_shutdown & RCV_SHUTDOWN) {
2176 err = -EPIPE;
1da177e4 2177 goto out_unlock;
001a2508 2178 }
1da177e4 2179
77cbe1a6
KI
2180 if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2181 err = -EPERM;
2182 goto out_unlock;
2183 }
2184
1da177e4
LT
2185 if (sk->sk_type != SOCK_SEQPACKET) {
2186 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2187 if (err)
2188 goto out_unlock;
2189 }
2190
a5527dda
RW
2191 /* other == sk && unix_peer(other) != sk if
2192 * - unix_peer(sk) == NULL, destination address bound to sk
2193 * - unix_peer(sk) == sk by time of get but disconnected before lock
2194 */
2195 if (other != sk &&
86b18aaa
QC
2196 unlikely(unix_peer(other) != sk &&
2197 unix_recvq_full_lockless(other))) {
7d267278
RW
2198 if (timeo) {
2199 timeo = unix_wait_for_peer(other, timeo);
2200
2201 err = sock_intr_errno(timeo);
2202 if (signal_pending(current))
62c6db25 2203 goto out_sock_put;
7d267278
RW
2204
2205 goto restart;
1da177e4
LT
2206 }
2207
7d267278
RW
2208 if (!sk_locked) {
2209 unix_state_unlock(other);
2210 unix_state_double_lock(sk, other);
2211 }
1da177e4 2212
7d267278
RW
2213 if (unix_peer(sk) != other ||
2214 unix_dgram_peer_wake_me(sk, other)) {
2215 err = -EAGAIN;
2216 sk_locked = 1;
2217 goto out_unlock;
2218 }
1da177e4 2219
7d267278
RW
2220 if (!sk_locked) {
2221 sk_locked = 1;
2222 goto restart_locked;
2223 }
1da177e4
LT
2224 }
2225
7d267278
RW
2226 if (unlikely(sk_locked))
2227 unix_state_unlock(sk);
2228
3f66116e
AC
2229 if (sock_flag(other, SOCK_RCVTSTAMP))
2230 __net_timestamp(skb);
3041bbbe
KI
2231
2232 unix_maybe_add_creds(skb, sk, other);
3c32da19 2233 scm_stat_add(other, skb);
7782040b 2234 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 2235 unix_state_unlock(other);
676d2369 2236 other->sk_data_ready(other);
1da177e4 2237 sock_put(other);
7cc05662 2238 scm_destroy(&scm);
1da177e4
LT
2239 return len;
2240
2241out_unlock:
7d267278
RW
2242 if (sk_locked)
2243 unix_state_unlock(sk);
1c92b4e5 2244 unix_state_unlock(other);
62c6db25
KI
2245out_sock_put:
2246 sock_put(other);
1da177e4 2247out_free:
085e6cba 2248 consume_skb(skb);
1da177e4 2249out:
7cc05662 2250 scm_destroy(&scm);
1da177e4
LT
2251 return err;
2252}
2253
e370a723 2254/* We use paged skbs for stream sockets, and limit occupancy to 32768
d4e9a408 2255 * bytes, and a minimum of a full page.
e370a723
ED
2256 */
2257#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 2258
4edf21aa 2259#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3041bbbe 2260static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
2aab4b96 2261 struct scm_cookie *scm, bool fds_sent)
314001f0
RS
2262{
2263 struct unix_sock *ousk = unix_sk(other);
2264 struct sk_buff *skb;
085e6cba 2265 int err;
314001f0 2266
3041bbbe 2267 skb = sock_alloc_send_skb(sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
314001f0
RS
2268
2269 if (!skb)
2270 return err;
2271
2aab4b96 2272 err = unix_scm_to_skb(scm, skb, !fds_sent);
085e6cba
KI
2273 if (err < 0)
2274 goto out;
2275
314001f0 2276 skb_put(skb, 1);
314001f0
RS
2277 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2278
085e6cba
KI
2279 if (err)
2280 goto out;
314001f0
RS
2281
2282 unix_state_lock(other);
19eed721
RS
2283
2284 if (sock_flag(other, SOCK_DEAD) ||
2285 (other->sk_shutdown & RCV_SHUTDOWN)) {
085e6cba 2286 err = -EPIPE;
77cbe1a6 2287 goto out_unlock;
19eed721
RS
2288 }
2289
77cbe1a6
KI
2290 if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2291 err = -EPERM;
2292 goto out_unlock;
19eed721
RS
2293 }
2294
3041bbbe 2295 unix_maybe_add_creds(skb, sk, other);
9841991a
KI
2296 scm_stat_add(other, skb);
2297
2298 spin_lock(&other->sk_receive_queue.lock);
e82025c6 2299 WRITE_ONCE(ousk->oob_skb, skb);
9841991a
KI
2300 __skb_queue_tail(&other->sk_receive_queue, skb);
2301 spin_unlock(&other->sk_receive_queue.lock);
314001f0 2302
314001f0
RS
2303 sk_send_sigurg(other);
2304 unix_state_unlock(other);
2305 other->sk_data_ready(other);
2306
085e6cba 2307 return 0;
77cbe1a6
KI
2308out_unlock:
2309 unix_state_unlock(other);
085e6cba
KI
2310out:
2311 consume_skb(skb);
314001f0
RS
2312 return err;
2313}
2314#endif
2315
1b784140
YX
2316static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2317 size_t len)
1da177e4 2318{
1da177e4 2319 struct sock *sk = sock->sk;
3b2d40dc 2320 struct sk_buff *skb = NULL;
1da177e4 2321 struct sock *other = NULL;
7cc05662 2322 struct scm_cookie scm;
8ba69ba6 2323 bool fds_sent = false;
3b2d40dc 2324 int err, sent = 0;
1da177e4 2325
7cc05662 2326 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
2327 if (err < 0)
2328 return err;
2329
d9f21b36
KI
2330 wait_for_unix_gc(scm.fp);
2331
314001f0 2332 if (msg->msg_flags & MSG_OOB) {
6c444255 2333 err = -EOPNOTSUPP;
4edf21aa 2334#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0
RS
2335 if (len)
2336 len--;
2337 else
2338#endif
2339 goto out_err;
2340 }
1da177e4
LT
2341
2342 if (msg->msg_namelen) {
8a34d4e8 2343 err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1da177e4
LT
2344 goto out_err;
2345 } else {
830a1e5c 2346 other = unix_peer(sk);
6c444255
KI
2347 if (!other) {
2348 err = -ENOTCONN;
1da177e4 2349 goto out_err;
6c444255 2350 }
1da177e4
LT
2351 }
2352
3b2d40dc
KI
2353 if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2354 goto out_pipe;
1da177e4 2355
6eba6a37 2356 while (sent < len) {
3b2d40dc
KI
2357 int size = len - sent;
2358 int data_len;
1da177e4 2359
a0dbf5f8
DH
2360 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2361 skb = sock_alloc_send_pskb(sk, 0, 0,
2362 msg->msg_flags & MSG_DONTWAIT,
2363 &err, 0);
2364 } else {
2365 /* Keep two messages in the pipe so it schedules better */
b0632e53 2366 size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
1da177e4 2367
a0dbf5f8
DH
2368 /* allow fallback to order-0 allocations */
2369 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 2370
a0dbf5f8 2371 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 2372
a0dbf5f8 2373 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
31ff6aa5 2374
a0dbf5f8
DH
2375 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2376 msg->msg_flags & MSG_DONTWAIT, &err,
2377 get_order(UNIX_SKB_FRAGS_SZ));
2378 }
e370a723 2379 if (!skb)
1da177e4
LT
2380 goto out_err;
2381
f78a5fda 2382 /* Only send the fds in the first buffer */
7cc05662 2383 err = unix_scm_to_skb(&scm, skb, !fds_sent);
d460b04b
KI
2384 if (err < 0)
2385 goto out_free;
2386
7361c36c 2387 fds_sent = true;
1da177e4 2388
a0dbf5f8 2389 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
6bd8614f 2390 skb->ip_summed = CHECKSUM_UNNECESSARY;
a0dbf5f8
DH
2391 err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2392 sk->sk_allocation);
d460b04b
KI
2393 if (err < 0)
2394 goto out_free;
2395
a0dbf5f8
DH
2396 size = err;
2397 refcount_add(size, &sk->sk_wmem_alloc);
2398 } else {
2399 skb_put(skb, size - data_len);
2400 skb->data_len = data_len;
2401 skb->len = size;
2402 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
d460b04b
KI
2403 if (err)
2404 goto out_free;
1da177e4
LT
2405 }
2406
1c92b4e5 2407 unix_state_lock(other);
1da177e4
LT
2408
2409 if (sock_flag(other, SOCK_DEAD) ||
2410 (other->sk_shutdown & RCV_SHUTDOWN))
3b2d40dc 2411 goto out_pipe_unlock;
1da177e4 2412
77cbe1a6
KI
2413 if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2414 unix_state_unlock(other);
2415 err = -EPERM;
2416 goto out_free;
2417 }
2418
3041bbbe 2419 unix_maybe_add_creds(skb, sk, other);
3c32da19 2420 scm_stat_add(other, skb);
7782040b 2421 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 2422 unix_state_unlock(other);
676d2369 2423 other->sk_data_ready(other);
e27dfcea 2424 sent += size;
1da177e4 2425 }
1da177e4 2426
4edf21aa 2427#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0 2428 if (msg->msg_flags & MSG_OOB) {
3041bbbe 2429 err = queue_oob(sk, msg, other, &scm, fds_sent);
314001f0
RS
2430 if (err)
2431 goto out_err;
2432 sent++;
2433 }
2434#endif
2435
7cc05662 2436 scm_destroy(&scm);
1da177e4
LT
2437
2438 return sent;
2439
3b2d40dc 2440out_pipe_unlock:
1c92b4e5 2441 unix_state_unlock(other);
3b2d40dc 2442out_pipe:
d460b04b 2443 if (!sent && !(msg->msg_flags & MSG_NOSIGNAL))
6eba6a37 2444 send_sig(SIGPIPE, current, 0);
1da177e4 2445 err = -EPIPE;
d460b04b 2446out_free:
085e6cba 2447 consume_skb(skb);
1da177e4 2448out_err:
7cc05662 2449 scm_destroy(&scm);
1da177e4
LT
2450 return sent ? : err;
2451}
2452
1b784140
YX
2453static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2454 size_t len)
1da177e4
LT
2455{
2456 int err;
2457 struct sock *sk = sock->sk;
ac7bfa62 2458
1da177e4
LT
2459 err = sock_error(sk);
2460 if (err)
2461 return err;
2462
8a34d4e8 2463 if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
1da177e4
LT
2464 return -ENOTCONN;
2465
2466 if (msg->msg_namelen)
2467 msg->msg_namelen = 0;
2468
1b784140 2469 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 2470}
ac7bfa62 2471
1b784140
YX
2472static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2473 size_t size, int flags)
a05d2ad1
EB
2474{
2475 struct sock *sk = sock->sk;
2476
8a34d4e8 2477 if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
a05d2ad1
EB
2478 return -ENOTCONN;
2479
1b784140 2480 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
2481}
2482
1da177e4
LT
2483static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2484{
ae3b5641 2485 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
1da177e4 2486
ae3b5641
AV
2487 if (addr) {
2488 msg->msg_namelen = addr->len;
2489 memcpy(msg->msg_name, addr->name, addr->len);
1da177e4
LT
2490 }
2491}
2492
9825d866
CW
2493int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2494 int flags)
1da177e4 2495{
7cc05662 2496 struct scm_cookie scm;
9825d866 2497 struct socket *sock = sk->sk_socket;
1da177e4 2498 struct unix_sock *u = unix_sk(sk);
64874280
RW
2499 struct sk_buff *skb, *last;
2500 long timeo;
fd69c399 2501 int skip;
1da177e4
LT
2502 int err;
2503
2504 err = -EOPNOTSUPP;
2505 if (flags&MSG_OOB)
2506 goto out;
2507
64874280 2508 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4 2509
64874280 2510 do {
6e1ce3c3 2511 mutex_lock(&u->iolock);
f55bb7f9 2512
64874280 2513 skip = sk_peek_offset(sk, flags);
b50b0580 2514 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
e427cad6
PA
2515 &skip, &err, &last);
2516 if (skb) {
2517 if (!(flags & MSG_PEEK))
2518 scm_stat_del(sk, skb);
64874280 2519 break;
e427cad6 2520 }
64874280 2521
6e1ce3c3 2522 mutex_unlock(&u->iolock);
64874280
RW
2523
2524 if (err != -EAGAIN)
2525 break;
2526 } while (timeo &&
b50b0580
SD
2527 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2528 &err, &timeo, last));
64874280 2529
6e1ce3c3 2530 if (!skb) { /* implies iolock unlocked */
0a112258
FZ
2531 unix_state_lock(sk);
2532 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2533 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2534 (sk->sk_shutdown & RCV_SHUTDOWN))
2535 err = 0;
2536 unix_state_unlock(sk);
64874280 2537 goto out;
0a112258 2538 }
1da177e4 2539
77b75f4d
RW
2540 if (wq_has_sleeper(&u->peer_wait))
2541 wake_up_interruptible_sync_poll(&u->peer_wait,
a9a08845
LT
2542 EPOLLOUT | EPOLLWRNORM |
2543 EPOLLWRBAND);
1da177e4 2544
859051dd 2545 if (msg->msg_name) {
1da177e4
LT
2546 unix_copy_addr(msg, skb->sk);
2547
859051dd
DDM
2548 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2549 msg->msg_name,
2550 &msg->msg_namelen);
2551 }
2552
f55bb7f9
PE
2553 if (size > skb->len - skip)
2554 size = skb->len - skip;
2555 else if (size < skb->len - skip)
1da177e4
LT
2556 msg->msg_flags |= MSG_TRUNC;
2557
51f3d02b 2558 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
2559 if (err)
2560 goto out_free;
2561
3f66116e
AC
2562 if (sock_flag(sk, SOCK_RCVTSTAMP))
2563 __sock_recv_timestamp(msg, sk, skb);
2564
7cc05662
CH
2565 memset(&scm, 0, sizeof(scm));
2566
2567 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2568 unix_set_secdata(&scm, skb);
1da177e4 2569
6eba6a37 2570 if (!(flags & MSG_PEEK)) {
1da177e4 2571 if (UNIXCB(skb).fp)
7cc05662 2572 unix_detach_fds(&scm, skb);
f55bb7f9
PE
2573
2574 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 2575 } else {
1da177e4
LT
2576 /* It is questionable: on PEEK we could:
2577 - do not return fds - good, but too simple 8)
2578 - return fds, and do not return them on read (old strategy,
2579 apparently wrong)
2580 - clone fds (I chose it for now, it is the most universal
2581 solution)
ac7bfa62
YH
2582
2583 POSIX 1003.1g does not actually define this clearly
2584 at all. POSIX 1003.1g doesn't define a lot of things
2585 clearly however!
2586
1da177e4 2587 */
f55bb7f9
PE
2588
2589 sk_peek_offset_fwd(sk, size);
2590
1da177e4 2591 if (UNIXCB(skb).fp)
cbcf0112 2592 unix_peek_fds(&scm, skb);
1da177e4 2593 }
9f6f9af7 2594 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 2595
a9c49cc2 2596 scm_recv_unix(sock, msg, &scm, flags);
1da177e4
LT
2597
2598out_free:
6eba6a37 2599 skb_free_datagram(sk, skb);
6e1ce3c3 2600 mutex_unlock(&u->iolock);
1da177e4
LT
2601out:
2602 return err;
2603}
29df44fa 2604
9825d866
CW
2605static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2606 int flags)
2607{
2608 struct sock *sk = sock->sk;
2609
2610#ifdef CONFIG_BPF_SYSCALL
94531cfc
JW
2611 const struct proto *prot = READ_ONCE(sk->sk_prot);
2612
2613 if (prot != &unix_dgram_proto)
ec095263 2614 return prot->recvmsg(sk, msg, size, flags, NULL);
9825d866
CW
2615#endif
2616 return __unix_dgram_recvmsg(sk, msg, size, flags);
2617}
2618
965b57b4 2619static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
29df44fa 2620{
d6e3b27c
PY
2621 struct unix_sock *u = unix_sk(sk);
2622 struct sk_buff *skb;
78fa0d61 2623 int err;
29df44fa 2624
d6e3b27c
PY
2625 mutex_lock(&u->iolock);
2626 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2627 mutex_unlock(&u->iolock);
2628 if (!skb)
2629 return err;
29df44fa 2630
78fa0d61 2631 return recv_actor(sk, skb);
29df44fa 2632}
1da177e4
LT
2633
2634/*
79f632c7 2635 * Sleep until more data has arrived. But check for races..
1da177e4 2636 */
79f632c7 2637static long unix_stream_data_wait(struct sock *sk, long timeo,
06a77b07
WC
2638 struct sk_buff *last, unsigned int last_len,
2639 bool freezable)
1da177e4 2640{
f5d39b02 2641 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2b514574 2642 struct sk_buff *tail;
1da177e4
LT
2643 DEFINE_WAIT(wait);
2644
1c92b4e5 2645 unix_state_lock(sk);
1da177e4
LT
2646
2647 for (;;) {
f5d39b02 2648 prepare_to_wait(sk_sleep(sk), &wait, state);
1da177e4 2649
2b514574
HFS
2650 tail = skb_peek_tail(&sk->sk_receive_queue);
2651 if (tail != last ||
2652 (tail && tail->len != last_len) ||
1da177e4
LT
2653 sk->sk_err ||
2654 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2655 signal_pending(current) ||
2656 !timeo)
2657 break;
2658
9cd3e072 2659 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1c92b4e5 2660 unix_state_unlock(sk);
f5d39b02 2661 timeo = schedule_timeout(timeo);
1c92b4e5 2662 unix_state_lock(sk);
b48732e4
MS
2663
2664 if (sock_flag(sk, SOCK_DEAD))
2665 break;
2666
9cd3e072 2667 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1da177e4
LT
2668 }
2669
aa395145 2670 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2671 unix_state_unlock(sk);
1da177e4
LT
2672 return timeo;
2673}
2674
2b514574
HFS
2675struct unix_stream_read_state {
2676 int (*recv_actor)(struct sk_buff *, int, int,
2677 struct unix_stream_read_state *);
2678 struct socket *socket;
2679 struct msghdr *msg;
2680 struct pipe_inode_info *pipe;
2681 size_t size;
2682 int flags;
2683 unsigned int splice_flags;
2684};
2685
314001f0
RS
2686#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2687static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2688{
32ca2454 2689 struct sk_buff *oob_skb, *read_skb = NULL;
314001f0
RS
2690 struct socket *sock = state->socket;
2691 struct sock *sk = sock->sk;
2692 struct unix_sock *u = unix_sk(sk);
2693 int chunk = 1;
2694
876c14ad
RS
2695 mutex_lock(&u->iolock);
2696 unix_state_lock(sk);
9841991a 2697 spin_lock(&sk->sk_receive_queue.lock);
876c14ad
RS
2698
2699 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
9841991a 2700 spin_unlock(&sk->sk_receive_queue.lock);
876c14ad
RS
2701 unix_state_unlock(sk);
2702 mutex_unlock(&u->iolock);
314001f0 2703 return -EINVAL;
876c14ad 2704 }
314001f0 2705
876c14ad 2706 oob_skb = u->oob_skb;
314001f0 2707
32ca2454 2708 if (!(state->flags & MSG_PEEK)) {
e82025c6 2709 WRITE_ONCE(u->oob_skb, NULL);
9841991a 2710
32ca2454
KI
2711 if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
2712 !unix_skb_len(oob_skb->prev)) {
2713 read_skb = oob_skb->prev;
2714 __skb_unlink(read_skb, &sk->sk_receive_queue);
2715 }
2716 }
2717
9841991a 2718 spin_unlock(&sk->sk_receive_queue.lock);
876c14ad
RS
2719 unix_state_unlock(sk);
2720
2721 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2722
4b7b4926 2723 if (!(state->flags & MSG_PEEK))
876c14ad 2724 UNIXCB(oob_skb).consumed += 1;
4b7b4926 2725
876c14ad
RS
2726 mutex_unlock(&u->iolock);
2727
32ca2454
KI
2728 consume_skb(read_skb);
2729
876c14ad
RS
2730 if (chunk < 0)
2731 return -EFAULT;
2732
314001f0
RS
2733 state->msg->msg_flags |= MSG_OOB;
2734 return 1;
2735}
2736
2737static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2738 int flags, int copied)
2739{
beb2c5f1 2740 struct sk_buff *read_skb = NULL, *unread_skb = NULL;
314001f0
RS
2741 struct unix_sock *u = unix_sk(sk);
2742
a0264a9f
KI
2743 if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
2744 return skb;
93c99f21 2745
a0264a9f
KI
2746 spin_lock(&sk->sk_receive_queue.lock);
2747
2748 if (!unix_skb_len(skb)) {
36893ef0 2749 if (copied && (!u->oob_skb || skb == u->oob_skb)) {
93c99f21
KI
2750 skb = NULL;
2751 } else if (flags & MSG_PEEK) {
2752 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2753 } else {
beb2c5f1 2754 read_skb = skb;
93c99f21 2755 skb = skb_peek_next(skb, &sk->sk_receive_queue);
beb2c5f1 2756 __skb_unlink(read_skb, &sk->sk_receive_queue);
b94038d8
KI
2757 }
2758
5aa57d9f
KI
2759 if (!skb)
2760 goto unlock;
579770dd 2761 }
9841991a 2762
579770dd
KI
2763 if (skb != u->oob_skb)
2764 goto unlock;
9841991a 2765
579770dd
KI
2766 if (copied) {
2767 skb = NULL;
2768 } else if (!(flags & MSG_PEEK)) {
2769 WRITE_ONCE(u->oob_skb, NULL);
9841991a 2770
579770dd
KI
2771 if (!sock_flag(sk, SOCK_URGINLINE)) {
2772 __skb_unlink(skb, &sk->sk_receive_queue);
beb2c5f1 2773 unread_skb = skb;
579770dd
KI
2774 skb = skb_peek(&sk->sk_receive_queue);
2775 }
2776 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
2777 skb = skb_peek_next(skb, &sk->sk_receive_queue);
314001f0 2778 }
579770dd
KI
2779
2780unlock:
2781 spin_unlock(&sk->sk_receive_queue.lock);
2782
a0264a9f 2783 consume_skb(read_skb);
533643b0 2784 kfree_skb_reason(unread_skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
579770dd 2785
314001f0
RS
2786 return skb;
2787}
2788#endif
2789
965b57b4 2790static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
77462de1 2791{
638f3260
ML
2792 struct unix_sock *u = unix_sk(sk);
2793 struct sk_buff *skb;
2794 int err;
2795
af4c733b 2796 if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
77462de1
JW
2797 return -ENOTCONN;
2798
638f3260
ML
2799 mutex_lock(&u->iolock);
2800 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2801 mutex_unlock(&u->iolock);
2802 if (!skb)
2803 return err;
2804
2805#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2806 if (unlikely(skb == READ_ONCE(u->oob_skb))) {
2807 bool drop = false;
2808
2809 unix_state_lock(sk);
2810
2811 if (sock_flag(sk, SOCK_DEAD)) {
2812 unix_state_unlock(sk);
bace4b46 2813 kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
638f3260
ML
2814 return -ECONNRESET;
2815 }
2816
2817 spin_lock(&sk->sk_receive_queue.lock);
2818 if (likely(skb == u->oob_skb)) {
2819 WRITE_ONCE(u->oob_skb, NULL);
2820 drop = true;
2821 }
2822 spin_unlock(&sk->sk_receive_queue.lock);
2823
2824 unix_state_unlock(sk);
2825
2826 if (drop) {
bace4b46 2827 kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
638f3260
ML
2828 return -EAGAIN;
2829 }
2830 }
2831#endif
2832
2833 return recv_actor(sk, skb);
77462de1
JW
2834}
2835
06a77b07
WC
2836static int unix_stream_read_generic(struct unix_stream_read_state *state,
2837 bool freezable)
1da177e4 2838{
7cc05662 2839 struct scm_cookie scm;
2b514574 2840 struct socket *sock = state->socket;
1da177e4
LT
2841 struct sock *sk = sock->sk;
2842 struct unix_sock *u = unix_sk(sk);
1da177e4 2843 int copied = 0;
2b514574 2844 int flags = state->flags;
de144391 2845 int noblock = flags & MSG_DONTWAIT;
2b514574 2846 bool check_creds = false;
1da177e4
LT
2847 int target;
2848 int err = 0;
2849 long timeo;
fc0d7536 2850 int skip;
2b514574
HFS
2851 size_t size = state->size;
2852 unsigned int last_len;
1da177e4 2853
8a34d4e8 2854 if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
1b92ee3d 2855 err = -EINVAL;
1da177e4 2856 goto out;
1b92ee3d 2857 }
1da177e4 2858
1b92ee3d
RW
2859 if (unlikely(flags & MSG_OOB)) {
2860 err = -EOPNOTSUPP;
314001f0 2861#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0 2862 err = unix_stream_recv_urg(state);
314001f0 2863#endif
1da177e4 2864 goto out;
1b92ee3d 2865 }
1da177e4 2866
2b514574 2867 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2868 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2869
2b514574
HFS
2870 memset(&scm, 0, sizeof(scm));
2871
1da177e4
LT
2872 /* Lock the socket to prevent queue disordering
2873 * while sleeps in memcpy_tomsg
2874 */
6e1ce3c3 2875 mutex_lock(&u->iolock);
1da177e4 2876
a0917e0b 2877 skip = max(sk_peek_offset(sk, flags), 0);
e9193d60 2878
6eba6a37 2879 do {
79f632c7 2880 struct sk_buff *skb, *last;
b5c08988 2881 int chunk;
1da177e4 2882
18eceb81 2883redo:
3c0d2f37 2884 unix_state_lock(sk);
b48732e4
MS
2885 if (sock_flag(sk, SOCK_DEAD)) {
2886 err = -ECONNRESET;
2887 goto unlock;
2888 }
79f632c7 2889 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2890 last_len = last ? last->len : 0;
314001f0 2891
283454c8 2892again:
314001f0
RS
2893#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2894 if (skb) {
2895 skb = manage_oob(skb, sk, flags, copied);
22dd70eb 2896 if (!skb && copied) {
314001f0 2897 unix_state_unlock(sk);
22dd70eb 2898 break;
314001f0
RS
2899 }
2900 }
2901#endif
6eba6a37 2902 if (skb == NULL) {
1da177e4 2903 if (copied >= target)
3c0d2f37 2904 goto unlock;
1da177e4
LT
2905
2906 /*
2907 * POSIX 1003.1g mandates this order.
2908 */
ac7bfa62 2909
6eba6a37
ED
2910 err = sock_error(sk);
2911 if (err)
3c0d2f37 2912 goto unlock;
1da177e4 2913 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2914 goto unlock;
2915
2916 unix_state_unlock(sk);
1b92ee3d
RW
2917 if (!timeo) {
2918 err = -EAGAIN;
1da177e4 2919 break;
1b92ee3d
RW
2920 }
2921
6e1ce3c3 2922 mutex_unlock(&u->iolock);
1da177e4 2923
2b514574 2924 timeo = unix_stream_data_wait(sk, timeo, last,
06a77b07 2925 last_len, freezable);
1da177e4 2926
3822b5c2 2927 if (signal_pending(current)) {
1da177e4 2928 err = sock_intr_errno(timeo);
fa0dc04d 2929 scm_destroy(&scm);
1da177e4
LT
2930 goto out;
2931 }
b3ca9b02 2932
6e1ce3c3 2933 mutex_lock(&u->iolock);
18eceb81 2934 goto redo;
2b514574 2935unlock:
3c0d2f37
MS
2936 unix_state_unlock(sk);
2937 break;
1da177e4 2938 }
fc0d7536 2939
e370a723
ED
2940 while (skip >= unix_skb_len(skb)) {
2941 skip -= unix_skb_len(skb);
79f632c7 2942 last = skb;
2b514574 2943 last_len = skb->len;
fc0d7536 2944 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2945 if (!skb)
2946 goto again;
fc0d7536
PE
2947 }
2948
3c0d2f37 2949 unix_state_unlock(sk);
1da177e4
LT
2950
2951 if (check_creds) {
2952 /* Never glue messages from different writers */
9490f886 2953 if (!unix_skb_scm_eq(skb, &scm))
1da177e4 2954 break;
350d4546 2955 } else if (unix_may_passcred(sk)) {
1da177e4 2956 /* Copy credentials */
7cc05662 2957 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2958 unix_set_secdata(&scm, skb);
2b514574 2959 check_creds = true;
1da177e4
LT
2960 }
2961
2962 /* Copy address just once */
2b514574
HFS
2963 if (state->msg && state->msg->msg_name) {
2964 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2965 state->msg->msg_name);
2966 unix_copy_addr(state->msg, skb->sk);
859051dd
DDM
2967
2968 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2969 state->msg->msg_name,
2970 &state->msg->msg_namelen);
2971
1da177e4
LT
2972 sunaddr = NULL;
2973 }
2974
e370a723 2975 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2b514574
HFS
2976 chunk = state->recv_actor(skb, skip, chunk, state);
2977 if (chunk < 0) {
1da177e4
LT
2978 if (copied == 0)
2979 copied = -EFAULT;
2980 break;
2981 }
2982 copied += chunk;
2983 size -= chunk;
2984
2985 /* Mark read part of skb as used */
6eba6a37 2986 if (!(flags & MSG_PEEK)) {
e370a723 2987 UNIXCB(skb).consumed += chunk;
1da177e4 2988
fc0d7536
PE
2989 sk_peek_offset_bwd(sk, chunk);
2990
3c32da19 2991 if (UNIXCB(skb).fp) {
3c32da19 2992 scm_stat_del(sk, skb);
7cc05662 2993 unix_detach_fds(&scm, skb);
3c32da19 2994 }
1da177e4 2995
e370a723 2996 if (unix_skb_len(skb))
1da177e4 2997 break;
1da177e4 2998
6f01fd6e 2999 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 3000 consume_skb(skb);
1da177e4 3001
7cc05662 3002 if (scm.fp)
1da177e4 3003 break;
6eba6a37 3004 } else {
1da177e4
LT
3005 /* It is questionable, see note in unix_dgram_recvmsg.
3006 */
3007 if (UNIXCB(skb).fp)
cbcf0112 3008 unix_peek_fds(&scm, skb);
1da177e4 3009
e9193d60 3010 sk_peek_offset_fwd(sk, chunk);
fc0d7536 3011
9f389e35
AC
3012 if (UNIXCB(skb).fp)
3013 break;
3014
e9193d60 3015 skip = 0;
9f389e35
AC
3016 last = skb;
3017 last_len = skb->len;
3018 unix_state_lock(sk);
3019 skb = skb_peek_next(skb, &sk->sk_receive_queue);
3020 if (skb)
3021 goto again;
3022 unix_state_unlock(sk);
1da177e4
LT
3023 break;
3024 }
3025 } while (size);
3026
6e1ce3c3 3027 mutex_unlock(&u->iolock);
9d797ee2 3028 if (state->msg)
a9c49cc2 3029 scm_recv_unix(sock, state->msg, &scm, flags);
2b514574
HFS
3030 else
3031 scm_destroy(&scm);
1da177e4
LT
3032out:
3033 return copied ? : err;
3034}
3035
2b514574
HFS
3036static int unix_stream_read_actor(struct sk_buff *skb,
3037 int skip, int chunk,
3038 struct unix_stream_read_state *state)
3039{
3040 int ret;
3041
3042 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
3043 state->msg, chunk);
3044 return ret ?: chunk;
3045}
3046
94531cfc
JW
3047int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
3048 size_t size, int flags)
3049{
3050 struct unix_stream_read_state state = {
3051 .recv_actor = unix_stream_read_actor,
3052 .socket = sk->sk_socket,
3053 .msg = msg,
3054 .size = size,
3055 .flags = flags
3056 };
3057
3058 return unix_stream_read_generic(&state, true);
3059}
3060
2b514574
HFS
3061static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
3062 size_t size, int flags)
3063{
3064 struct unix_stream_read_state state = {
3065 .recv_actor = unix_stream_read_actor,
3066 .socket = sock,
3067 .msg = msg,
3068 .size = size,
3069 .flags = flags
3070 };
3071
94531cfc
JW
3072#ifdef CONFIG_BPF_SYSCALL
3073 struct sock *sk = sock->sk;
3074 const struct proto *prot = READ_ONCE(sk->sk_prot);
3075
3076 if (prot != &unix_stream_proto)
ec095263 3077 return prot->recvmsg(sk, msg, size, flags, NULL);
94531cfc 3078#endif
06a77b07 3079 return unix_stream_read_generic(&state, true);
2b514574
HFS
3080}
3081
2b514574
HFS
3082static int unix_stream_splice_actor(struct sk_buff *skb,
3083 int skip, int chunk,
3084 struct unix_stream_read_state *state)
3085{
3086 return skb_splice_bits(skb, state->socket->sk,
3087 UNIXCB(skb).consumed + skip,
25869262 3088 state->pipe, chunk, state->splice_flags);
2b514574
HFS
3089}
3090
3091static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
3092 struct pipe_inode_info *pipe,
3093 size_t size, unsigned int flags)
3094{
3095 struct unix_stream_read_state state = {
3096 .recv_actor = unix_stream_splice_actor,
3097 .socket = sock,
3098 .pipe = pipe,
3099 .size = size,
3100 .splice_flags = flags,
3101 };
3102
3103 if (unlikely(*ppos))
3104 return -ESPIPE;
3105
3106 if (sock->file->f_flags & O_NONBLOCK ||
3107 flags & SPLICE_F_NONBLOCK)
3108 state.flags = MSG_DONTWAIT;
3109
06a77b07 3110 return unix_stream_read_generic(&state, false);
2b514574
HFS
3111}
3112
1da177e4
LT
3113static int unix_shutdown(struct socket *sock, int mode)
3114{
3115 struct sock *sk = sock->sk;
3116 struct sock *other;
3117
fc61b928
XW
3118 if (mode < SHUT_RD || mode > SHUT_RDWR)
3119 return -EINVAL;
3120 /* This maps:
3121 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
3122 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
3123 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
3124 */
3125 ++mode;
7180a031
AC
3126
3127 unix_state_lock(sk);
e1d09c2c 3128 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
7180a031
AC
3129 other = unix_peer(sk);
3130 if (other)
3131 sock_hold(other);
3132 unix_state_unlock(sk);
3133 sk->sk_state_change(sk);
3134
3135 if (other &&
3136 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
3137
3138 int peer_mode = 0;
94531cfc 3139 const struct proto *prot = READ_ONCE(other->sk_prot);
7180a031 3140
d359902d
JW
3141 if (prot->unhash)
3142 prot->unhash(other);
7180a031
AC
3143 if (mode&RCV_SHUTDOWN)
3144 peer_mode |= SEND_SHUTDOWN;
3145 if (mode&SEND_SHUTDOWN)
3146 peer_mode |= RCV_SHUTDOWN;
3147 unix_state_lock(other);
e1d09c2c 3148 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
7180a031
AC
3149 unix_state_unlock(other);
3150 other->sk_state_change(other);
d0c6416b 3151 if (peer_mode == SHUTDOWN_MASK)
7180a031 3152 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
d0c6416b 3153 else if (peer_mode & RCV_SHUTDOWN)
7180a031 3154 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 3155 }
7180a031
AC
3156 if (other)
3157 sock_put(other);
3158
1da177e4
LT
3159 return 0;
3160}
3161
885ee74d
PE
3162long unix_inq_len(struct sock *sk)
3163{
3164 struct sk_buff *skb;
3165 long amount = 0;
3166
3a0f38eb 3167 if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
885ee74d
PE
3168 return -EINVAL;
3169
3170 spin_lock(&sk->sk_receive_queue.lock);
3171 if (sk->sk_type == SOCK_STREAM ||
3172 sk->sk_type == SOCK_SEQPACKET) {
3173 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 3174 amount += unix_skb_len(skb);
885ee74d
PE
3175 } else {
3176 skb = skb_peek(&sk->sk_receive_queue);
3177 if (skb)
3178 amount = skb->len;
3179 }
3180 spin_unlock(&sk->sk_receive_queue.lock);
3181
3182 return amount;
3183}
3184EXPORT_SYMBOL_GPL(unix_inq_len);
3185
3186long unix_outq_len(struct sock *sk)
3187{
3188 return sk_wmem_alloc_get(sk);
3189}
3190EXPORT_SYMBOL_GPL(unix_outq_len);
3191
ba94f308
AV
3192static int unix_open_file(struct sock *sk)
3193{
3194 struct path path;
3195 struct file *f;
3196 int fd;
3197
3198 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3199 return -EPERM;
3200
ae3b5641
AV
3201 if (!smp_load_acquire(&unix_sk(sk)->addr))
3202 return -ENOENT;
3203
ba94f308 3204 path = unix_sk(sk)->path;
ae3b5641 3205 if (!path.dentry)
ba94f308 3206 return -ENOENT;
ba94f308
AV
3207
3208 path_get(&path);
ba94f308
AV
3209
3210 fd = get_unused_fd_flags(O_CLOEXEC);
3211 if (fd < 0)
3212 goto out;
3213
3214 f = dentry_open(&path, O_PATH, current_cred());
3215 if (IS_ERR(f)) {
3216 put_unused_fd(fd);
3217 fd = PTR_ERR(f);
3218 goto out;
3219 }
3220
3221 fd_install(fd, f);
3222out:
3223 path_put(&path);
3224
3225 return fd;
3226}
3227
1da177e4
LT
3228static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3229{
3230 struct sock *sk = sock->sk;
e27dfcea 3231 long amount = 0;
1da177e4
LT
3232 int err;
3233
6eba6a37
ED
3234 switch (cmd) {
3235 case SIOCOUTQ:
885ee74d 3236 amount = unix_outq_len(sk);
6eba6a37
ED
3237 err = put_user(amount, (int __user *)arg);
3238 break;
3239 case SIOCINQ:
885ee74d
PE
3240 amount = unix_inq_len(sk);
3241 if (amount < 0)
3242 err = amount;
3243 else
1da177e4 3244 err = put_user(amount, (int __user *)arg);
885ee74d 3245 break;
ba94f308
AV
3246 case SIOCUNIXFILE:
3247 err = unix_open_file(sk);
3248 break;
314001f0
RS
3249#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3250 case SIOCATMARK:
3251 {
e400cfa3 3252 struct unix_sock *u = unix_sk(sk);
314001f0 3253 struct sk_buff *skb;
314001f0
RS
3254 int answ = 0;
3255
e400cfa3
KI
3256 mutex_lock(&u->iolock);
3257
314001f0 3258 skb = skb_peek(&sk->sk_receive_queue);
e400cfa3
KI
3259 if (skb) {
3260 struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
5aa57d9f
KI
3261 struct sk_buff *next_skb;
3262
3263 next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
e400cfa3
KI
3264
3265 if (skb == oob_skb ||
5aa57d9f
KI
3266 (!unix_skb_len(skb) &&
3267 (!oob_skb || next_skb == oob_skb)))
e400cfa3
KI
3268 answ = 1;
3269 }
3270
3271 mutex_unlock(&u->iolock);
3272
314001f0
RS
3273 err = put_user(answ, (int __user *)arg);
3274 }
3275 break;
3276#endif
6eba6a37
ED
3277 default:
3278 err = -ENOIOCTLCMD;
3279 break;
1da177e4
LT
3280 }
3281 return err;
3282}
3283
5f6beb9e
AB
3284#ifdef CONFIG_COMPAT
3285static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3286{
3287 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3288}
3289#endif
3290
a11e1d43 3291static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
3292{
3293 struct sock *sk = sock->sk;
eb0718fb 3294 unsigned char state;
a11e1d43 3295 __poll_t mask;
e1d09c2c 3296 u8 shutdown;
a11e1d43 3297
89ab066d 3298 sock_poll_wait(file, sock, wait);
a11e1d43 3299 mask = 0;
e1d09c2c 3300 shutdown = READ_ONCE(sk->sk_shutdown);
eb0718fb 3301 state = READ_ONCE(sk->sk_state);
1da177e4
LT
3302
3303 /* exceptional events? */
cc04410a 3304 if (READ_ONCE(sk->sk_err))
a9a08845 3305 mask |= EPOLLERR;
e1d09c2c 3306 if (shutdown == SHUTDOWN_MASK)
a9a08845 3307 mask |= EPOLLHUP;
e1d09c2c 3308 if (shutdown & RCV_SHUTDOWN)
a9a08845 3309 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
1da177e4
LT
3310
3311 /* readable? */
3ef7cf57 3312 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3313 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3314 if (sk_is_readable(sk))
3315 mask |= EPOLLIN | EPOLLRDNORM;
d9a232d4
KI
3316#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3317 if (READ_ONCE(unix_sk(sk)->oob_skb))
3318 mask |= EPOLLPRI;
3319#endif
1da177e4
LT
3320
3321 /* Connection-based need to check for termination and startup */
6eba6a37 3322 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
eb0718fb 3323 state == TCP_CLOSE)
a9a08845 3324 mask |= EPOLLHUP;
1da177e4
LT
3325
3326 /*
3327 * we set writable also when the other side has shut down the
3328 * connection. This prevents stuck sockets.
3329 */
eb0718fb 3330 if (unix_writable(sk, state))
a9a08845 3331 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1da177e4
LT
3332
3333 return mask;
3334}
3335
a11e1d43
LT
3336static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3337 poll_table *wait)
3c73419c 3338{
ec0d215f 3339 struct sock *sk = sock->sk, *other;
a11e1d43 3340 unsigned int writable;
eb0718fb 3341 unsigned char state;
a11e1d43 3342 __poll_t mask;
e1d09c2c 3343 u8 shutdown;
a11e1d43 3344
89ab066d 3345 sock_poll_wait(file, sock, wait);
a11e1d43 3346 mask = 0;
e1d09c2c 3347 shutdown = READ_ONCE(sk->sk_shutdown);
eb0718fb 3348 state = READ_ONCE(sk->sk_state);
3c73419c
RW
3349
3350 /* exceptional events? */
cc04410a
ED
3351 if (READ_ONCE(sk->sk_err) ||
3352 !skb_queue_empty_lockless(&sk->sk_error_queue))
a9a08845
LT
3353 mask |= EPOLLERR |
3354 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
7d4c04fc 3355
e1d09c2c 3356 if (shutdown & RCV_SHUTDOWN)
a9a08845 3357 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
e1d09c2c 3358 if (shutdown == SHUTDOWN_MASK)
a9a08845 3359 mask |= EPOLLHUP;
3c73419c
RW
3360
3361 /* readable? */
3ef7cf57 3362 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3363 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3364 if (sk_is_readable(sk))
3365 mask |= EPOLLIN | EPOLLRDNORM;
3c73419c
RW
3366
3367 /* Connection-based need to check for termination and startup */
eb0718fb
KI
3368 if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3369 mask |= EPOLLHUP;
3c73419c 3370
973a34aa 3371 /* No write status requested, avoid expensive OUT tests. */
a11e1d43 3372 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
973a34aa
ED
3373 return mask;
3374
eb0718fb 3375 writable = unix_writable(sk, state);
7d267278
RW
3376 if (writable) {
3377 unix_state_lock(sk);
3378
3379 other = unix_peer(sk);
3380 if (other && unix_peer(other) != sk &&
04f08eb4 3381 unix_recvq_full_lockless(other) &&
7d267278
RW
3382 unix_dgram_peer_wake_me(sk, other))
3383 writable = 0;
3384
3385 unix_state_unlock(sk);
ec0d215f
RW
3386 }
3387
3388 if (writable)
a9a08845 3389 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3c73419c 3390 else
9cd3e072 3391 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3c73419c 3392
3c73419c
RW
3393 return mask;
3394}
1da177e4
LT
3395
3396#ifdef CONFIG_PROC_FS
a53eb3fe 3397
7123aaa3
ED
3398#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3399
3400#define get_bucket(x) ((x) >> BUCKET_SPACE)
afd20b92 3401#define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
7123aaa3 3402#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 3403
7123aaa3 3404static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 3405{
7123aaa3
ED
3406 unsigned long offset = get_offset(*pos);
3407 unsigned long bucket = get_bucket(*pos);
7123aaa3 3408 unsigned long count = 0;
cf2f225e 3409 struct sock *sk;
1da177e4 3410
cf2f225e
KI
3411 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3412 sk; sk = sk_next(sk)) {
7123aaa3
ED
3413 if (++count == offset)
3414 break;
3415 }
3416
3417 return sk;
3418}
3419
4408d55a 3420static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
7123aaa3 3421{
afd20b92 3422 unsigned long bucket = get_bucket(*pos);
79b05bea 3423 struct net *net = seq_file_net(seq);
4408d55a 3424 struct sock *sk;
7123aaa3 3425
f302d180 3426 while (bucket < UNIX_HASH_SIZE) {
79b05bea 3427 spin_lock(&net->unx.table.locks[bucket]);
4408d55a 3428
7123aaa3
ED
3429 sk = unix_from_bucket(seq, pos);
3430 if (sk)
3431 return sk;
3432
79b05bea 3433 spin_unlock(&net->unx.table.locks[bucket]);
4408d55a
KI
3434
3435 *pos = set_bucket_offset(++bucket, 1);
3436 }
7123aaa3 3437
1da177e4
LT
3438 return NULL;
3439}
3440
4408d55a
KI
3441static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3442 loff_t *pos)
3443{
3444 unsigned long bucket = get_bucket(*pos);
3445
cf2f225e
KI
3446 sk = sk_next(sk);
3447 if (sk)
3448 return sk;
3449
4408d55a 3450
cf2f225e 3451 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
4408d55a
KI
3452
3453 *pos = set_bucket_offset(++bucket, 1);
3454
3455 return unix_get_first(seq, pos);
3456}
3457
1da177e4
LT
3458static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3459{
7123aaa3
ED
3460 if (!*pos)
3461 return SEQ_START_TOKEN;
3462
4408d55a 3463 return unix_get_first(seq, pos);
1da177e4
LT
3464}
3465
3466static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3467{
3468 ++*pos;
4408d55a
KI
3469
3470 if (v == SEQ_START_TOKEN)
3471 return unix_get_first(seq, pos);
3472
3473 return unix_get_next(seq, v, pos);
1da177e4
LT
3474}
3475
3476static void unix_seq_stop(struct seq_file *seq, void *v)
3477{
afd20b92
KI
3478 struct sock *sk = v;
3479
2f7ca90a 3480 if (sk)
79b05bea 3481 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
1da177e4
LT
3482}
3483
3484static int unix_seq_show(struct seq_file *seq, void *v)
3485{
ac7bfa62 3486
b9f3124f 3487 if (v == SEQ_START_TOKEN)
1da177e4
LT
3488 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3489 "Inode Path\n");
3490 else {
3491 struct sock *s = v;
3492 struct unix_sock *u = unix_sk(s);
1c92b4e5 3493 unix_state_lock(s);
1da177e4 3494
71338aa7 3495 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4 3496 s,
41c6d650 3497 refcount_read(&s->sk_refcnt),
1da177e4
LT
3498 0,
3499 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3500 s->sk_type,
3501 s->sk_socket ?
3502 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3503 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3504 sock_i_ino(s));
3505
2f7ca90a 3506 if (u->addr) { // under a hash table lock here
1da177e4
LT
3507 int i, len;
3508 seq_putc(seq, ' ');
3509
3510 i = 0;
755662ce
KI
3511 len = u->addr->len -
3512 offsetof(struct sockaddr_un, sun_path);
5ce7ab49 3513 if (u->addr->name->sun_path[0]) {
1da177e4 3514 len--;
5ce7ab49 3515 } else {
1da177e4
LT
3516 seq_putc(seq, '@');
3517 i++;
3518 }
3519 for ( ; i < len; i++)
e7947ea7
IB
3520 seq_putc(seq, u->addr->name->sun_path[i] ?:
3521 '@');
1da177e4 3522 }
1c92b4e5 3523 unix_state_unlock(s);
1da177e4
LT
3524 seq_putc(seq, '\n');
3525 }
3526
3527 return 0;
3528}
3529
56b3d975 3530static const struct seq_operations unix_seq_ops = {
1da177e4
LT
3531 .start = unix_seq_start,
3532 .next = unix_seq_next,
3533 .stop = unix_seq_stop,
3534 .show = unix_seq_show,
3535};
2c860a43 3536
3a04927f 3537#ifdef CONFIG_BPF_SYSCALL
855d8e77
KI
3538struct bpf_unix_iter_state {
3539 struct seq_net_private p;
3540 unsigned int cur_sk;
3541 unsigned int end_sk;
3542 unsigned int max_sk;
3543 struct sock **batch;
3544 bool st_bucket_done;
3545};
3546
2c860a43
KI
3547struct bpf_iter__unix {
3548 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3549 __bpf_md_ptr(struct unix_sock *, unix_sk);
3550 uid_t uid __aligned(8);
3551};
3552
3553static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3554 struct unix_sock *unix_sk, uid_t uid)
3555{
3556 struct bpf_iter__unix ctx;
3557
3558 meta->seq_num--; /* skip SEQ_START_TOKEN */
3559 ctx.meta = meta;
3560 ctx.unix_sk = unix_sk;
3561 ctx.uid = uid;
3562 return bpf_iter_run_prog(prog, &ctx);
3563}
3564
855d8e77
KI
3565static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3566
3567{
3568 struct bpf_unix_iter_state *iter = seq->private;
3569 unsigned int expected = 1;
3570 struct sock *sk;
3571
3572 sock_hold(start_sk);
3573 iter->batch[iter->end_sk++] = start_sk;
3574
3575 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
855d8e77
KI
3576 if (iter->end_sk < iter->max_sk) {
3577 sock_hold(sk);
3578 iter->batch[iter->end_sk++] = sk;
3579 }
3580
3581 expected++;
3582 }
3583
cf2f225e 3584 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
855d8e77
KI
3585
3586 return expected;
3587}
3588
3589static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3590{
3591 while (iter->cur_sk < iter->end_sk)
3592 sock_put(iter->batch[iter->cur_sk++]);
3593}
3594
3595static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3596 unsigned int new_batch_sz)
3597{
3598 struct sock **new_batch;
3599
3600 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3601 GFP_USER | __GFP_NOWARN);
3602 if (!new_batch)
3603 return -ENOMEM;
3604
3605 bpf_iter_unix_put_batch(iter);
3606 kvfree(iter->batch);
3607 iter->batch = new_batch;
3608 iter->max_sk = new_batch_sz;
3609
3610 return 0;
3611}
3612
3613static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3614 loff_t *pos)
3615{
3616 struct bpf_unix_iter_state *iter = seq->private;
3617 unsigned int expected;
3618 bool resized = false;
3619 struct sock *sk;
3620
3621 if (iter->st_bucket_done)
3622 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3623
3624again:
3625 /* Get a new batch */
3626 iter->cur_sk = 0;
3627 iter->end_sk = 0;
3628
3629 sk = unix_get_first(seq, pos);
3630 if (!sk)
3631 return NULL; /* Done */
3632
3633 expected = bpf_iter_unix_hold_batch(seq, sk);
3634
3635 if (iter->end_sk == expected) {
3636 iter->st_bucket_done = true;
3637 return sk;
3638 }
3639
3640 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3641 resized = true;
3642 goto again;
3643 }
3644
3645 return sk;
3646}
3647
3648static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3649{
3650 if (!*pos)
3651 return SEQ_START_TOKEN;
3652
3653 /* bpf iter does not support lseek, so it always
3654 * continue from where it was stop()-ped.
3655 */
3656 return bpf_iter_unix_batch(seq, pos);
3657}
3658
3659static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3660{
3661 struct bpf_unix_iter_state *iter = seq->private;
3662 struct sock *sk;
3663
3664 /* Whenever seq_next() is called, the iter->cur_sk is
3665 * done with seq_show(), so advance to the next sk in
3666 * the batch.
3667 */
3668 if (iter->cur_sk < iter->end_sk)
3669 sock_put(iter->batch[iter->cur_sk++]);
3670
3671 ++*pos;
3672
3673 if (iter->cur_sk < iter->end_sk)
3674 sk = iter->batch[iter->cur_sk];
3675 else
3676 sk = bpf_iter_unix_batch(seq, pos);
3677
3678 return sk;
3679}
3680
2c860a43
KI
3681static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3682{
3683 struct bpf_iter_meta meta;
3684 struct bpf_prog *prog;
3685 struct sock *sk = v;
3686 uid_t uid;
855d8e77
KI
3687 bool slow;
3688 int ret;
2c860a43
KI
3689
3690 if (v == SEQ_START_TOKEN)
3691 return 0;
3692
855d8e77
KI
3693 slow = lock_sock_fast(sk);
3694
3695 if (unlikely(sk_unhashed(sk))) {
3696 ret = SEQ_SKIP;
3697 goto unlock;
3698 }
3699
2c860a43
KI
3700 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3701 meta.seq = seq;
3702 prog = bpf_iter_get_info(&meta, false);
855d8e77
KI
3703 ret = unix_prog_seq_show(prog, &meta, v, uid);
3704unlock:
3705 unlock_sock_fast(sk, slow);
3706 return ret;
2c860a43
KI
3707}
3708
3709static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3710{
855d8e77 3711 struct bpf_unix_iter_state *iter = seq->private;
2c860a43
KI
3712 struct bpf_iter_meta meta;
3713 struct bpf_prog *prog;
3714
3715 if (!v) {
3716 meta.seq = seq;
3717 prog = bpf_iter_get_info(&meta, true);
3718 if (prog)
3719 (void)unix_prog_seq_show(prog, &meta, v, 0);
3720 }
3721
855d8e77
KI
3722 if (iter->cur_sk < iter->end_sk)
3723 bpf_iter_unix_put_batch(iter);
2c860a43
KI
3724}
3725
3726static const struct seq_operations bpf_iter_unix_seq_ops = {
855d8e77
KI
3727 .start = bpf_iter_unix_seq_start,
3728 .next = bpf_iter_unix_seq_next,
2c860a43
KI
3729 .stop = bpf_iter_unix_seq_stop,
3730 .show = bpf_iter_unix_seq_show,
3731};
3732#endif
1da177e4
LT
3733#endif
3734
ec1b4cf7 3735static const struct net_proto_family unix_family_ops = {
1da177e4
LT
3736 .family = PF_UNIX,
3737 .create = unix_create,
3738 .owner = THIS_MODULE,
3739};
3740
097e66c5 3741
2c8c1e72 3742static int __net_init unix_net_init(struct net *net)
097e66c5 3743{
b6e81138 3744 int i;
097e66c5 3745
a0a53c8b 3746 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
3747 if (unix_sysctl_register(net))
3748 goto out;
d392e497 3749
097e66c5 3750#ifdef CONFIG_PROC_FS
c3506372 3751 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
b6e81138
KI
3752 sizeof(struct seq_net_private)))
3753 goto err_sysctl;
3754#endif
3755
3756 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3757 sizeof(spinlock_t), GFP_KERNEL);
3758 if (!net->unx.table.locks)
3759 goto err_proc;
3760
3761 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3762 sizeof(struct hlist_head),
3763 GFP_KERNEL);
3764 if (!net->unx.table.buckets)
3765 goto free_locks;
3766
3767 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3768 spin_lock_init(&net->unx.table.locks[i]);
3955802f 3769 lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL);
b6e81138 3770 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
1597fbc0 3771 }
b6e81138
KI
3772
3773 return 0;
3774
3775free_locks:
3776 kvfree(net->unx.table.locks);
3777err_proc:
3778#ifdef CONFIG_PROC_FS
3779 remove_proc_entry("unix", net->proc_net);
3780err_sysctl:
097e66c5 3781#endif
b6e81138 3782 unix_sysctl_unregister(net);
097e66c5 3783out:
b6e81138 3784 return -ENOMEM;
097e66c5
DL
3785}
3786
2c8c1e72 3787static void __net_exit unix_net_exit(struct net *net)
097e66c5 3788{
b6e81138
KI
3789 kvfree(net->unx.table.buckets);
3790 kvfree(net->unx.table.locks);
1597fbc0 3791 unix_sysctl_unregister(net);
ece31ffd 3792 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
3793}
3794
3795static struct pernet_operations unix_net_ops = {
3796 .init = unix_net_init,
3797 .exit = unix_net_exit,
3798};
3799
3a04927f 3800#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
2c860a43
KI
3801DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3802 struct unix_sock *unix_sk, uid_t uid)
3803
855d8e77
KI
3804#define INIT_BATCH_SZ 16
3805
3806static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3807{
3808 struct bpf_unix_iter_state *iter = priv_data;
3809 int err;
3810
3811 err = bpf_iter_init_seq_net(priv_data, aux);
3812 if (err)
3813 return err;
3814
3815 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3816 if (err) {
3817 bpf_iter_fini_seq_net(priv_data);
3818 return err;
3819 }
3820
3821 return 0;
3822}
3823
3824static void bpf_iter_fini_unix(void *priv_data)
3825{
3826 struct bpf_unix_iter_state *iter = priv_data;
3827
3828 bpf_iter_fini_seq_net(priv_data);
3829 kvfree(iter->batch);
3830}
3831
2c860a43
KI
3832static const struct bpf_iter_seq_info unix_seq_info = {
3833 .seq_ops = &bpf_iter_unix_seq_ops,
855d8e77
KI
3834 .init_seq_private = bpf_iter_init_unix,
3835 .fini_seq_private = bpf_iter_fini_unix,
3836 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
2c860a43
KI
3837};
3838
eb7d8f1d
KI
3839static const struct bpf_func_proto *
3840bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3841 const struct bpf_prog *prog)
3842{
3843 switch (func_id) {
3844 case BPF_FUNC_setsockopt:
3845 return &bpf_sk_setsockopt_proto;
3846 case BPF_FUNC_getsockopt:
3847 return &bpf_sk_getsockopt_proto;
3848 default:
3849 return NULL;
3850 }
3851}
3852
2c860a43
KI
3853static struct bpf_iter_reg unix_reg_info = {
3854 .target = "unix",
3855 .ctx_arg_info_size = 1,
3856 .ctx_arg_info = {
3857 { offsetof(struct bpf_iter__unix, unix_sk),
3858 PTR_TO_BTF_ID_OR_NULL },
3859 },
eb7d8f1d 3860 .get_func_proto = bpf_iter_unix_get_func_proto,
2c860a43
KI
3861 .seq_info = &unix_seq_info,
3862};
3863
3864static void __init bpf_iter_register(void)
3865{
3866 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3867 if (bpf_iter_reg_target(&unix_reg_info))
3868 pr_warn("Warning: could not register bpf iterator unix\n");
3869}
3870#endif
3871
1da177e4
LT
3872static int __init af_unix_init(void)
3873{
51bae889 3874 int i, rc = -1;
1da177e4 3875
c593642c 3876 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
1da177e4 3877
51bae889
KI
3878 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3879 spin_lock_init(&bsd_socket_locks[i]);
3880 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3881 }
3882
94531cfc
JW
3883 rc = proto_register(&unix_dgram_proto, 1);
3884 if (rc != 0) {
3885 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3886 goto out;
3887 }
3888
3889 rc = proto_register(&unix_stream_proto, 1);
ac7bfa62 3890 if (rc != 0) {
5cc208be 3891 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
73e341e0 3892 proto_unregister(&unix_dgram_proto);
1da177e4
LT
3893 goto out;
3894 }
3895
3896 sock_register(&unix_family_ops);
097e66c5 3897 register_pernet_subsys(&unix_net_ops);
c6382918 3898 unix_bpf_build_proto();
2c860a43 3899
3a04927f 3900#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
2c860a43
KI
3901 bpf_iter_register();
3902#endif
3903
1da177e4
LT
3904out:
3905 return rc;
3906}
3907
3a04927f 3908/* Later than subsys_initcall() because we depend on stuff initialised there */
3d366960 3909fs_initcall(af_unix_init);