]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/unix/af_unix.c
missing barriers in some of unix_sock ->addr and ->path accesses
[thirdparty/kernel/stable.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
113aa838 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
1da177e4
LT
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
5cc208be 83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
1da177e4 85#include <linux/module.h>
1da177e4 86#include <linux/kernel.h>
1da177e4 87#include <linux/signal.h>
3f07c014 88#include <linux/sched/signal.h>
1da177e4
LT
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
7c0f6ba6 103#include <linux/uaccess.h>
1da177e4
LT
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
457c4cbc 106#include <net/net_namespace.h>
1da177e4 107#include <net/sock.h>
c752f073 108#include <net/tcp_states.h>
1da177e4
LT
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
1da177e4
LT
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
2b15af6f 119#include <linux/freezer.h>
ba94f308 120#include <linux/file.h>
1da177e4 121
7123aaa3 122struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
123EXPORT_SYMBOL_GPL(unix_socket_table);
124DEFINE_SPINLOCK(unix_table_lock);
125EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 126static atomic_long_t unix_nr_socks;
1da177e4 127
1da177e4 128
7123aaa3
ED
129static struct hlist_head *unix_sockets_unbound(void *addr)
130{
131 unsigned long hash = (unsigned long)addr;
132
133 hash ^= hash >> 16;
134 hash ^= hash >> 8;
135 hash %= UNIX_HASH_SIZE;
136 return &unix_socket_table[UNIX_HASH_SIZE + hash];
137}
138
139#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 140
877ce7c1 141#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 142static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 143{
37a9a8df 144 UNIXCB(skb).secid = scm->secid;
877ce7c1
CZ
145}
146
147static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
148{
37a9a8df
SS
149 scm->secid = UNIXCB(skb).secid;
150}
151
152static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
153{
154 return (scm->secid == UNIXCB(skb).secid);
877ce7c1
CZ
155}
156#else
dc49c1f9 157static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
158{ }
159
160static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
161{ }
37a9a8df
SS
162
163static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
164{
165 return true;
166}
877ce7c1
CZ
167#endif /* CONFIG_SECURITY_NETWORK */
168
1da177e4
LT
169/*
170 * SMP locking strategy:
fbe9cc4a 171 * hash table is protected with spinlock unix_table_lock
663717f6 172 * each socket state is protected by separate spin lock.
1da177e4
LT
173 */
174
95c96174 175static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 176{
0a13404d 177 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 178
1da177e4
LT
179 hash ^= hash>>8;
180 return hash&(UNIX_HASH_SIZE-1);
181}
182
183#define unix_peer(sk) (unix_sk(sk)->peer)
184
185static inline int unix_our_peer(struct sock *sk, struct sock *osk)
186{
187 return unix_peer(osk) == sk;
188}
189
190static inline int unix_may_send(struct sock *sk, struct sock *osk)
191{
6eba6a37 192 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
193}
194
3c73419c
RW
195static inline int unix_recvq_full(struct sock const *sk)
196{
197 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
198}
199
fa7ff56f 200struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
201{
202 struct sock *peer;
203
1c92b4e5 204 unix_state_lock(s);
1da177e4
LT
205 peer = unix_peer(s);
206 if (peer)
207 sock_hold(peer);
1c92b4e5 208 unix_state_unlock(s);
1da177e4
LT
209 return peer;
210}
fa7ff56f 211EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
212
213static inline void unix_release_addr(struct unix_address *addr)
214{
8c9814b9 215 if (refcount_dec_and_test(&addr->refcnt))
1da177e4
LT
216 kfree(addr);
217}
218
219/*
220 * Check unix socket name:
221 * - should be not zero length.
222 * - if started by not zero, should be NULL terminated (FS object)
223 * - if started by zero, it is abstract name.
224 */
ac7bfa62 225
95c96174 226static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
1da177e4 227{
33c4368e
KK
228 *hashp = 0;
229
1da177e4
LT
230 if (len <= sizeof(short) || len > sizeof(*sunaddr))
231 return -EINVAL;
232 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
233 return -EINVAL;
234 if (sunaddr->sun_path[0]) {
235 /*
236 * This may look like an off by one error but it is a bit more
237 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 238 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
239 * we are guaranteed that it is a valid memory location in our
240 * kernel address buffer.
241 */
e27dfcea 242 ((char *)sunaddr)[len] = 0;
1da177e4
LT
243 len = strlen(sunaddr->sun_path)+1+sizeof(short);
244 return len;
245 }
246
07f0757a 247 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
248 return len;
249}
250
251static void __unix_remove_socket(struct sock *sk)
252{
253 sk_del_node_init(sk);
254}
255
256static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
257{
547b792c 258 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
259 sk_add_node(sk, list);
260}
261
262static inline void unix_remove_socket(struct sock *sk)
263{
fbe9cc4a 264 spin_lock(&unix_table_lock);
1da177e4 265 __unix_remove_socket(sk);
fbe9cc4a 266 spin_unlock(&unix_table_lock);
1da177e4
LT
267}
268
269static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
270{
fbe9cc4a 271 spin_lock(&unix_table_lock);
1da177e4 272 __unix_insert_socket(list, sk);
fbe9cc4a 273 spin_unlock(&unix_table_lock);
1da177e4
LT
274}
275
097e66c5
DL
276static struct sock *__unix_find_socket_byname(struct net *net,
277 struct sockaddr_un *sunname,
95c96174 278 int len, int type, unsigned int hash)
1da177e4
LT
279{
280 struct sock *s;
1da177e4 281
b67bfe0d 282 sk_for_each(s, &unix_socket_table[hash ^ type]) {
1da177e4
LT
283 struct unix_sock *u = unix_sk(s);
284
878628fb 285 if (!net_eq(sock_net(s), net))
097e66c5
DL
286 continue;
287
1da177e4
LT
288 if (u->addr->len == len &&
289 !memcmp(u->addr->name, sunname, len))
290 goto found;
291 }
292 s = NULL;
293found:
294 return s;
295}
296
097e66c5
DL
297static inline struct sock *unix_find_socket_byname(struct net *net,
298 struct sockaddr_un *sunname,
1da177e4 299 int len, int type,
95c96174 300 unsigned int hash)
1da177e4
LT
301{
302 struct sock *s;
303
fbe9cc4a 304 spin_lock(&unix_table_lock);
097e66c5 305 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
306 if (s)
307 sock_hold(s);
fbe9cc4a 308 spin_unlock(&unix_table_lock);
1da177e4
LT
309 return s;
310}
311
6616f788 312static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
313{
314 struct sock *s;
1da177e4 315
fbe9cc4a 316 spin_lock(&unix_table_lock);
b67bfe0d 317 sk_for_each(s,
1da177e4 318 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 319 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 320
beef5121 321 if (dentry && d_backing_inode(dentry) == i) {
1da177e4
LT
322 sock_hold(s);
323 goto found;
324 }
325 }
326 s = NULL;
327found:
fbe9cc4a 328 spin_unlock(&unix_table_lock);
1da177e4
LT
329 return s;
330}
331
7d267278
RW
332/* Support code for asymmetrically connected dgram sockets
333 *
334 * If a datagram socket is connected to a socket not itself connected
335 * to the first socket (eg, /dev/log), clients may only enqueue more
336 * messages if the present receive queue of the server socket is not
337 * "too large". This means there's a second writeability condition
338 * poll and sendmsg need to test. The dgram recv code will do a wake
339 * up on the peer_wait wait queue of a socket upon reception of a
340 * datagram which needs to be propagated to sleeping would-be writers
341 * since these might not have sent anything so far. This can't be
342 * accomplished via poll_wait because the lifetime of the server
343 * socket might be less than that of its clients if these break their
344 * association with it or if the server socket is closed while clients
345 * are still connected to it and there's no way to inform "a polling
346 * implementation" that it should let go of a certain wait queue
347 *
ac6424b9 348 * In order to propagate a wake up, a wait_queue_entry_t of the client
7d267278
RW
349 * socket is enqueued on the peer_wait queue of the server socket
350 * whose wake function does a wake_up on the ordinary client socket
351 * wait queue. This connection is established whenever a write (or
352 * poll for write) hit the flow control condition and broken when the
353 * association to the server socket is dissolved or after a wake up
354 * was relayed.
355 */
356
ac6424b9 357static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
7d267278
RW
358 void *key)
359{
360 struct unix_sock *u;
361 wait_queue_head_t *u_sleep;
362
363 u = container_of(q, struct unix_sock, peer_wake);
364
365 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
366 q);
367 u->peer_wake.private = NULL;
368
369 /* relaying can only happen while the wq still exists */
370 u_sleep = sk_sleep(&u->sk);
371 if (u_sleep)
3ad6f93e 372 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
7d267278
RW
373
374 return 0;
375}
376
377static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
378{
379 struct unix_sock *u, *u_other;
380 int rc;
381
382 u = unix_sk(sk);
383 u_other = unix_sk(other);
384 rc = 0;
385 spin_lock(&u_other->peer_wait.lock);
386
387 if (!u->peer_wake.private) {
388 u->peer_wake.private = other;
389 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
390
391 rc = 1;
392 }
393
394 spin_unlock(&u_other->peer_wait.lock);
395 return rc;
396}
397
398static void unix_dgram_peer_wake_disconnect(struct sock *sk,
399 struct sock *other)
400{
401 struct unix_sock *u, *u_other;
402
403 u = unix_sk(sk);
404 u_other = unix_sk(other);
405 spin_lock(&u_other->peer_wait.lock);
406
407 if (u->peer_wake.private == other) {
408 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
409 u->peer_wake.private = NULL;
410 }
411
412 spin_unlock(&u_other->peer_wait.lock);
413}
414
415static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
416 struct sock *other)
417{
418 unix_dgram_peer_wake_disconnect(sk, other);
419 wake_up_interruptible_poll(sk_sleep(sk),
a9a08845
LT
420 EPOLLOUT |
421 EPOLLWRNORM |
422 EPOLLWRBAND);
7d267278
RW
423}
424
425/* preconditions:
426 * - unix_peer(sk) == other
427 * - association is stable
428 */
429static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
430{
431 int connected;
432
433 connected = unix_dgram_peer_wake_connect(sk, other);
434
51f7e951
JB
435 /* If other is SOCK_DEAD, we want to make sure we signal
436 * POLLOUT, such that a subsequent write() can get a
437 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
438 * to other and its full, we will hang waiting for POLLOUT.
439 */
440 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
7d267278
RW
441 return 1;
442
443 if (connected)
444 unix_dgram_peer_wake_disconnect(sk, other);
445
446 return 0;
447}
448
1586a587 449static int unix_writable(const struct sock *sk)
1da177e4 450{
1586a587 451 return sk->sk_state != TCP_LISTEN &&
14afee4b 452 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
1da177e4
LT
453}
454
455static void unix_write_space(struct sock *sk)
456{
43815482
ED
457 struct socket_wq *wq;
458
459 rcu_read_lock();
1da177e4 460 if (unix_writable(sk)) {
43815482 461 wq = rcu_dereference(sk->sk_wq);
1ce0bf50 462 if (skwq_has_sleeper(wq))
67426b75 463 wake_up_interruptible_sync_poll(&wq->wait,
a9a08845 464 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
8d8ad9d7 465 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 466 }
43815482 467 rcu_read_unlock();
1da177e4
LT
468}
469
470/* When dgram socket disconnects (or changes its peer), we clear its receive
471 * queue of packets arrived from previous peer. First, it allows to do
472 * flow control based only on wmem_alloc; second, sk connected to peer
473 * may receive messages only from that peer. */
474static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
475{
b03efcfb 476 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
477 skb_queue_purge(&sk->sk_receive_queue);
478 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
479
480 /* If one link of bidirectional dgram pipe is disconnected,
481 * we signal error. Messages are lost. Do not make this,
482 * when peer was not connected to us.
483 */
484 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
485 other->sk_err = ECONNRESET;
486 other->sk_error_report(other);
487 }
488 }
489}
490
491static void unix_sock_destructor(struct sock *sk)
492{
493 struct unix_sock *u = unix_sk(sk);
494
495 skb_queue_purge(&sk->sk_receive_queue);
496
14afee4b 497 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
547b792c
IJ
498 WARN_ON(!sk_unhashed(sk));
499 WARN_ON(sk->sk_socket);
1da177e4 500 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 501 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
502 return;
503 }
504
505 if (u->addr)
506 unix_release_addr(u->addr);
507
518de9b3 508 atomic_long_dec(&unix_nr_socks);
6f756a8c 509 local_bh_disable();
a8076d8d 510 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 511 local_bh_enable();
1da177e4 512#ifdef UNIX_REFCNT_DEBUG
5cc208be 513 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 514 atomic_long_read(&unix_nr_socks));
1da177e4
LT
515#endif
516}
517
ded34e0f 518static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
519{
520 struct unix_sock *u = unix_sk(sk);
40ffe67d 521 struct path path;
1da177e4
LT
522 struct sock *skpair;
523 struct sk_buff *skb;
524 int state;
525
526 unix_remove_socket(sk);
527
528 /* Clear state */
1c92b4e5 529 unix_state_lock(sk);
1da177e4
LT
530 sock_orphan(sk);
531 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
532 path = u->path;
533 u->path.dentry = NULL;
534 u->path.mnt = NULL;
1da177e4
LT
535 state = sk->sk_state;
536 sk->sk_state = TCP_CLOSE;
1c92b4e5 537 unix_state_unlock(sk);
1da177e4
LT
538
539 wake_up_interruptible_all(&u->peer_wait);
540
e27dfcea 541 skpair = unix_peer(sk);
1da177e4 542
e27dfcea 543 if (skpair != NULL) {
1da177e4 544 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 545 unix_state_lock(skpair);
1da177e4
LT
546 /* No more writes */
547 skpair->sk_shutdown = SHUTDOWN_MASK;
548 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
549 skpair->sk_err = ECONNRESET;
1c92b4e5 550 unix_state_unlock(skpair);
1da177e4 551 skpair->sk_state_change(skpair);
8d8ad9d7 552 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 553 }
7d267278
RW
554
555 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4
LT
556 sock_put(skpair); /* It may now die */
557 unix_peer(sk) = NULL;
558 }
559
560 /* Try to flush out this socket. Throw out buffers at least */
561
562 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 563 if (state == TCP_LISTEN)
1da177e4
LT
564 unix_release_sock(skb->sk, 1);
565 /* passed fds are erased in the kfree_skb hook */
73ed5d25 566 UNIXCB(skb).consumed = skb->len;
1da177e4
LT
567 kfree_skb(skb);
568 }
569
40ffe67d
AV
570 if (path.dentry)
571 path_put(&path);
1da177e4
LT
572
573 sock_put(sk);
574
575 /* ---- Socket is dead now and most probably destroyed ---- */
576
577 /*
e04dae84 578 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
579 * ECONNRESET and we die on the spot. In Linux we behave
580 * like files and pipes do and wait for the last
581 * dereference.
582 *
583 * Can't we simply set sock->err?
584 *
585 * What the above comment does talk about? --ANK(980817)
586 */
587
9305cfa4 588 if (unix_tot_inflight)
ac7bfa62 589 unix_gc(); /* Garbage collect fds */
1da177e4
LT
590}
591
109f6e39
EB
592static void init_peercred(struct sock *sk)
593{
594 put_pid(sk->sk_peer_pid);
595 if (sk->sk_peer_cred)
596 put_cred(sk->sk_peer_cred);
597 sk->sk_peer_pid = get_pid(task_tgid(current));
598 sk->sk_peer_cred = get_current_cred();
599}
600
601static void copy_peercred(struct sock *sk, struct sock *peersk)
602{
603 put_pid(sk->sk_peer_pid);
604 if (sk->sk_peer_cred)
605 put_cred(sk->sk_peer_cred);
606 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
607 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
608}
609
1da177e4
LT
610static int unix_listen(struct socket *sock, int backlog)
611{
612 int err;
613 struct sock *sk = sock->sk;
614 struct unix_sock *u = unix_sk(sk);
109f6e39 615 struct pid *old_pid = NULL;
1da177e4
LT
616
617 err = -EOPNOTSUPP;
6eba6a37
ED
618 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
619 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
620 err = -EINVAL;
621 if (!u->addr)
6eba6a37 622 goto out; /* No listens on an unbound socket */
1c92b4e5 623 unix_state_lock(sk);
1da177e4
LT
624 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
625 goto out_unlock;
626 if (backlog > sk->sk_max_ack_backlog)
627 wake_up_interruptible_all(&u->peer_wait);
628 sk->sk_max_ack_backlog = backlog;
629 sk->sk_state = TCP_LISTEN;
630 /* set credentials so connect can copy them */
109f6e39 631 init_peercred(sk);
1da177e4
LT
632 err = 0;
633
634out_unlock:
1c92b4e5 635 unix_state_unlock(sk);
109f6e39 636 put_pid(old_pid);
1da177e4
LT
637out:
638 return err;
639}
640
641static int unix_release(struct socket *);
642static int unix_bind(struct socket *, struct sockaddr *, int);
643static int unix_stream_connect(struct socket *, struct sockaddr *,
644 int addr_len, int flags);
645static int unix_socketpair(struct socket *, struct socket *);
cdfbabfb 646static int unix_accept(struct socket *, struct socket *, int, bool);
9b2c45d4 647static int unix_getname(struct socket *, struct sockaddr *, int);
a11e1d43
LT
648static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
649static __poll_t unix_dgram_poll(struct file *, struct socket *,
650 poll_table *);
1da177e4
LT
651static int unix_ioctl(struct socket *, unsigned int, unsigned long);
652static int unix_shutdown(struct socket *, int);
1b784140
YX
653static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
654static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
869e7c62
HFS
655static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
656 size_t size, int flags);
2b514574
HFS
657static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
658 struct pipe_inode_info *, size_t size,
659 unsigned int flags);
1b784140
YX
660static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
661static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
1da177e4
LT
662static int unix_dgram_connect(struct socket *, struct sockaddr *,
663 int, int);
1b784140
YX
664static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
665static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
666 int);
1da177e4 667
12663bfc 668static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
669{
670 struct unix_sock *u = unix_sk(sk);
671
6e1ce3c3 672 if (mutex_lock_interruptible(&u->iolock))
12663bfc
SL
673 return -EINTR;
674
f55bb7f9 675 sk->sk_peek_off = val;
6e1ce3c3 676 mutex_unlock(&u->iolock);
12663bfc
SL
677
678 return 0;
f55bb7f9
PE
679}
680
681
90ddc4f0 682static const struct proto_ops unix_stream_ops = {
1da177e4
LT
683 .family = PF_UNIX,
684 .owner = THIS_MODULE,
685 .release = unix_release,
686 .bind = unix_bind,
687 .connect = unix_stream_connect,
688 .socketpair = unix_socketpair,
689 .accept = unix_accept,
690 .getname = unix_getname,
a11e1d43 691 .poll = unix_poll,
1da177e4
LT
692 .ioctl = unix_ioctl,
693 .listen = unix_listen,
694 .shutdown = unix_shutdown,
695 .setsockopt = sock_no_setsockopt,
696 .getsockopt = sock_no_getsockopt,
697 .sendmsg = unix_stream_sendmsg,
698 .recvmsg = unix_stream_recvmsg,
699 .mmap = sock_no_mmap,
869e7c62 700 .sendpage = unix_stream_sendpage,
2b514574 701 .splice_read = unix_stream_splice_read,
fc0d7536 702 .set_peek_off = unix_set_peek_off,
1da177e4
LT
703};
704
90ddc4f0 705static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
706 .family = PF_UNIX,
707 .owner = THIS_MODULE,
708 .release = unix_release,
709 .bind = unix_bind,
710 .connect = unix_dgram_connect,
711 .socketpair = unix_socketpair,
712 .accept = sock_no_accept,
713 .getname = unix_getname,
a11e1d43 714 .poll = unix_dgram_poll,
1da177e4
LT
715 .ioctl = unix_ioctl,
716 .listen = sock_no_listen,
717 .shutdown = unix_shutdown,
718 .setsockopt = sock_no_setsockopt,
719 .getsockopt = sock_no_getsockopt,
720 .sendmsg = unix_dgram_sendmsg,
721 .recvmsg = unix_dgram_recvmsg,
722 .mmap = sock_no_mmap,
723 .sendpage = sock_no_sendpage,
f55bb7f9 724 .set_peek_off = unix_set_peek_off,
1da177e4
LT
725};
726
90ddc4f0 727static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
728 .family = PF_UNIX,
729 .owner = THIS_MODULE,
730 .release = unix_release,
731 .bind = unix_bind,
732 .connect = unix_stream_connect,
733 .socketpair = unix_socketpair,
734 .accept = unix_accept,
735 .getname = unix_getname,
a11e1d43 736 .poll = unix_dgram_poll,
1da177e4
LT
737 .ioctl = unix_ioctl,
738 .listen = unix_listen,
739 .shutdown = unix_shutdown,
740 .setsockopt = sock_no_setsockopt,
741 .getsockopt = sock_no_getsockopt,
742 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 743 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
744 .mmap = sock_no_mmap,
745 .sendpage = sock_no_sendpage,
f55bb7f9 746 .set_peek_off = unix_set_peek_off,
1da177e4
LT
747};
748
749static struct proto unix_proto = {
248969ae
ED
750 .name = "UNIX",
751 .owner = THIS_MODULE,
248969ae 752 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
753};
754
11aa9c28 755static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
1da177e4
LT
756{
757 struct sock *sk = NULL;
758 struct unix_sock *u;
759
518de9b3
ED
760 atomic_long_inc(&unix_nr_socks);
761 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
762 goto out;
763
11aa9c28 764 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
1da177e4
LT
765 if (!sk)
766 goto out;
767
6eba6a37 768 sock_init_data(sock, sk);
1da177e4 769
3aa9799e 770 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1da177e4 771 sk->sk_write_space = unix_write_space;
a0a53c8b 772 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
773 sk->sk_destruct = unix_sock_destructor;
774 u = unix_sk(sk);
40ffe67d
AV
775 u->path.dentry = NULL;
776 u->path.mnt = NULL;
fd19f329 777 spin_lock_init(&u->lock);
516e0cc5 778 atomic_long_set(&u->inflight, 0);
1fd05ba5 779 INIT_LIST_HEAD(&u->link);
6e1ce3c3
LT
780 mutex_init(&u->iolock); /* single task reading lock */
781 mutex_init(&u->bindlock); /* single task binding lock */
1da177e4 782 init_waitqueue_head(&u->peer_wait);
7d267278 783 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
7123aaa3 784 unix_insert_socket(unix_sockets_unbound(sk), sk);
1da177e4 785out:
284b327b 786 if (sk == NULL)
518de9b3 787 atomic_long_dec(&unix_nr_socks);
920de804
ED
788 else {
789 local_bh_disable();
a8076d8d 790 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
791 local_bh_enable();
792 }
1da177e4
LT
793 return sk;
794}
795
3f378b68
EP
796static int unix_create(struct net *net, struct socket *sock, int protocol,
797 int kern)
1da177e4
LT
798{
799 if (protocol && protocol != PF_UNIX)
800 return -EPROTONOSUPPORT;
801
802 sock->state = SS_UNCONNECTED;
803
804 switch (sock->type) {
805 case SOCK_STREAM:
806 sock->ops = &unix_stream_ops;
807 break;
808 /*
809 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
810 * nothing uses it.
811 */
812 case SOCK_RAW:
e27dfcea 813 sock->type = SOCK_DGRAM;
110af3ac 814 /* fall through */
1da177e4
LT
815 case SOCK_DGRAM:
816 sock->ops = &unix_dgram_ops;
817 break;
818 case SOCK_SEQPACKET:
819 sock->ops = &unix_seqpacket_ops;
820 break;
821 default:
822 return -ESOCKTNOSUPPORT;
823 }
824
11aa9c28 825 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
1da177e4
LT
826}
827
828static int unix_release(struct socket *sock)
829{
830 struct sock *sk = sock->sk;
831
832 if (!sk)
833 return 0;
834
ded34e0f 835 unix_release_sock(sk, 0);
1da177e4
LT
836 sock->sk = NULL;
837
ded34e0f 838 return 0;
1da177e4
LT
839}
840
841static int unix_autobind(struct socket *sock)
842{
843 struct sock *sk = sock->sk;
3b1e0a65 844 struct net *net = sock_net(sk);
1da177e4
LT
845 struct unix_sock *u = unix_sk(sk);
846 static u32 ordernum = 1;
6eba6a37 847 struct unix_address *addr;
1da177e4 848 int err;
8df73ff9 849 unsigned int retries = 0;
1da177e4 850
6e1ce3c3 851 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7
SL
852 if (err)
853 return err;
1da177e4
LT
854
855 err = 0;
856 if (u->addr)
857 goto out;
858
859 err = -ENOMEM;
0da974f4 860 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
861 if (!addr)
862 goto out;
863
1da177e4 864 addr->name->sun_family = AF_UNIX;
8c9814b9 865 refcount_set(&addr->refcnt, 1);
1da177e4
LT
866
867retry:
868 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 869 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 870
fbe9cc4a 871 spin_lock(&unix_table_lock);
1da177e4
LT
872 ordernum = (ordernum+1)&0xFFFFF;
873
097e66c5 874 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 875 addr->hash)) {
fbe9cc4a 876 spin_unlock(&unix_table_lock);
8df73ff9
TH
877 /*
878 * __unix_find_socket_byname() may take long time if many names
879 * are already in use.
880 */
881 cond_resched();
882 /* Give up if all names seems to be in use. */
883 if (retries++ == 0xFFFFF) {
884 err = -ENOSPC;
885 kfree(addr);
886 goto out;
887 }
1da177e4
LT
888 goto retry;
889 }
890 addr->hash ^= sk->sk_type;
891
892 __unix_remove_socket(sk);
4fa1d0ff 893 smp_store_release(&u->addr, addr);
1da177e4 894 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 895 spin_unlock(&unix_table_lock);
1da177e4
LT
896 err = 0;
897
6e1ce3c3 898out: mutex_unlock(&u->bindlock);
1da177e4
LT
899 return err;
900}
901
097e66c5
DL
902static struct sock *unix_find_other(struct net *net,
903 struct sockaddr_un *sunname, int len,
95c96174 904 int type, unsigned int hash, int *error)
1da177e4
LT
905{
906 struct sock *u;
421748ec 907 struct path path;
1da177e4 908 int err = 0;
ac7bfa62 909
1da177e4 910 if (sunname->sun_path[0]) {
421748ec
AV
911 struct inode *inode;
912 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
913 if (err)
914 goto fail;
beef5121 915 inode = d_backing_inode(path.dentry);
421748ec 916 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
917 if (err)
918 goto put_fail;
919
920 err = -ECONNREFUSED;
421748ec 921 if (!S_ISSOCK(inode->i_mode))
1da177e4 922 goto put_fail;
6616f788 923 u = unix_find_socket_byinode(inode);
1da177e4
LT
924 if (!u)
925 goto put_fail;
926
927 if (u->sk_type == type)
68ac1234 928 touch_atime(&path);
1da177e4 929
421748ec 930 path_put(&path);
1da177e4 931
e27dfcea 932 err = -EPROTOTYPE;
1da177e4
LT
933 if (u->sk_type != type) {
934 sock_put(u);
935 goto fail;
936 }
937 } else {
938 err = -ECONNREFUSED;
e27dfcea 939 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
940 if (u) {
941 struct dentry *dentry;
40ffe67d 942 dentry = unix_sk(u)->path.dentry;
1da177e4 943 if (dentry)
68ac1234 944 touch_atime(&unix_sk(u)->path);
1da177e4
LT
945 } else
946 goto fail;
947 }
948 return u;
949
950put_fail:
421748ec 951 path_put(&path);
1da177e4 952fail:
e27dfcea 953 *error = err;
1da177e4
LT
954 return NULL;
955}
956
38f7bd94 957static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
faf02010 958{
38f7bd94
LT
959 struct dentry *dentry;
960 struct path path;
961 int err = 0;
962 /*
963 * Get the parent directory, calculate the hash for last
964 * component.
965 */
966 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
967 err = PTR_ERR(dentry);
968 if (IS_ERR(dentry))
969 return err;
faf02010 970
38f7bd94
LT
971 /*
972 * All right, let's create it.
973 */
974 err = security_path_mknod(&path, dentry, mode, 0);
faf02010 975 if (!err) {
38f7bd94 976 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
faf02010 977 if (!err) {
38f7bd94 978 res->mnt = mntget(path.mnt);
faf02010
AV
979 res->dentry = dget(dentry);
980 }
981 }
38f7bd94 982 done_path_create(&path, dentry);
faf02010
AV
983 return err;
984}
1da177e4
LT
985
986static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
987{
988 struct sock *sk = sock->sk;
3b1e0a65 989 struct net *net = sock_net(sk);
1da177e4 990 struct unix_sock *u = unix_sk(sk);
e27dfcea 991 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 992 char *sun_path = sunaddr->sun_path;
38f7bd94 993 int err;
95c96174 994 unsigned int hash;
1da177e4
LT
995 struct unix_address *addr;
996 struct hlist_head *list;
82fe0d2b 997 struct path path = { };
1da177e4
LT
998
999 err = -EINVAL;
defbcf2d
MJ
1000 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1001 sunaddr->sun_family != AF_UNIX)
1da177e4
LT
1002 goto out;
1003
e27dfcea 1004 if (addr_len == sizeof(short)) {
1da177e4
LT
1005 err = unix_autobind(sock);
1006 goto out;
1007 }
1008
1009 err = unix_mkname(sunaddr, addr_len, &hash);
1010 if (err < 0)
1011 goto out;
1012 addr_len = err;
1013
0fb44559
WC
1014 if (sun_path[0]) {
1015 umode_t mode = S_IFSOCK |
1016 (SOCK_INODE(sock)->i_mode & ~current_umask());
1017 err = unix_mknod(sun_path, mode, &path);
1018 if (err) {
1019 if (err == -EEXIST)
1020 err = -EADDRINUSE;
1021 goto out;
1022 }
1023 }
1024
6e1ce3c3 1025 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7 1026 if (err)
0fb44559 1027 goto out_put;
1da177e4
LT
1028
1029 err = -EINVAL;
1030 if (u->addr)
1031 goto out_up;
1032
1033 err = -ENOMEM;
1034 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1035 if (!addr)
1036 goto out_up;
1037
1038 memcpy(addr->name, sunaddr, addr_len);
1039 addr->len = addr_len;
1040 addr->hash = hash ^ sk->sk_type;
8c9814b9 1041 refcount_set(&addr->refcnt, 1);
1da177e4 1042
38f7bd94 1043 if (sun_path[0]) {
1da177e4 1044 addr->hash = UNIX_HASH_SIZE;
beef5121 1045 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
faf02010 1046 spin_lock(&unix_table_lock);
38f7bd94 1047 u->path = path;
faf02010
AV
1048 list = &unix_socket_table[hash];
1049 } else {
1050 spin_lock(&unix_table_lock);
1da177e4 1051 err = -EADDRINUSE;
097e66c5 1052 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
1053 sk->sk_type, hash)) {
1054 unix_release_addr(addr);
1055 goto out_unlock;
1056 }
1057
1058 list = &unix_socket_table[addr->hash];
1da177e4
LT
1059 }
1060
1061 err = 0;
1062 __unix_remove_socket(sk);
4fa1d0ff 1063 smp_store_release(&u->addr, addr);
1da177e4
LT
1064 __unix_insert_socket(list, sk);
1065
1066out_unlock:
fbe9cc4a 1067 spin_unlock(&unix_table_lock);
1da177e4 1068out_up:
6e1ce3c3 1069 mutex_unlock(&u->bindlock);
0fb44559
WC
1070out_put:
1071 if (err)
1072 path_put(&path);
1da177e4
LT
1073out:
1074 return err;
1da177e4
LT
1075}
1076
278a3de5
DM
1077static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1078{
1079 if (unlikely(sk1 == sk2) || !sk2) {
1080 unix_state_lock(sk1);
1081 return;
1082 }
1083 if (sk1 < sk2) {
1084 unix_state_lock(sk1);
1085 unix_state_lock_nested(sk2);
1086 } else {
1087 unix_state_lock(sk2);
1088 unix_state_lock_nested(sk1);
1089 }
1090}
1091
1092static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1093{
1094 if (unlikely(sk1 == sk2) || !sk2) {
1095 unix_state_unlock(sk1);
1096 return;
1097 }
1098 unix_state_unlock(sk1);
1099 unix_state_unlock(sk2);
1100}
1101
1da177e4
LT
1102static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1103 int alen, int flags)
1104{
1105 struct sock *sk = sock->sk;
3b1e0a65 1106 struct net *net = sock_net(sk);
e27dfcea 1107 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 1108 struct sock *other;
95c96174 1109 unsigned int hash;
1da177e4
LT
1110 int err;
1111
defbcf2d
MJ
1112 err = -EINVAL;
1113 if (alen < offsetofend(struct sockaddr, sa_family))
1114 goto out;
1115
1da177e4
LT
1116 if (addr->sa_family != AF_UNSPEC) {
1117 err = unix_mkname(sunaddr, alen, &hash);
1118 if (err < 0)
1119 goto out;
1120 alen = err;
1121
1122 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1123 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1124 goto out;
1125
278a3de5 1126restart:
e27dfcea 1127 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
1128 if (!other)
1129 goto out;
1130
278a3de5
DM
1131 unix_state_double_lock(sk, other);
1132
1133 /* Apparently VFS overslept socket death. Retry. */
1134 if (sock_flag(other, SOCK_DEAD)) {
1135 unix_state_double_unlock(sk, other);
1136 sock_put(other);
1137 goto restart;
1138 }
1da177e4
LT
1139
1140 err = -EPERM;
1141 if (!unix_may_send(sk, other))
1142 goto out_unlock;
1143
1144 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1145 if (err)
1146 goto out_unlock;
1147
1148 } else {
1149 /*
1150 * 1003.1g breaking connected state with AF_UNSPEC
1151 */
1152 other = NULL;
278a3de5 1153 unix_state_double_lock(sk, other);
1da177e4
LT
1154 }
1155
1156 /*
1157 * If it was connected, reconnect.
1158 */
1159 if (unix_peer(sk)) {
1160 struct sock *old_peer = unix_peer(sk);
e27dfcea 1161 unix_peer(sk) = other;
7d267278
RW
1162 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1163
278a3de5 1164 unix_state_double_unlock(sk, other);
1da177e4
LT
1165
1166 if (other != old_peer)
1167 unix_dgram_disconnected(sk, old_peer);
1168 sock_put(old_peer);
1169 } else {
e27dfcea 1170 unix_peer(sk) = other;
278a3de5 1171 unix_state_double_unlock(sk, other);
1da177e4 1172 }
ac7bfa62 1173 return 0;
1da177e4
LT
1174
1175out_unlock:
278a3de5 1176 unix_state_double_unlock(sk, other);
1da177e4
LT
1177 sock_put(other);
1178out:
1179 return err;
1180}
1181
1182static long unix_wait_for_peer(struct sock *other, long timeo)
1183{
1184 struct unix_sock *u = unix_sk(other);
1185 int sched;
1186 DEFINE_WAIT(wait);
1187
1188 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1189
1190 sched = !sock_flag(other, SOCK_DEAD) &&
1191 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1192 unix_recvq_full(other);
1da177e4 1193
1c92b4e5 1194 unix_state_unlock(other);
1da177e4
LT
1195
1196 if (sched)
1197 timeo = schedule_timeout(timeo);
1198
1199 finish_wait(&u->peer_wait, &wait);
1200 return timeo;
1201}
1202
1203static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1204 int addr_len, int flags)
1205{
e27dfcea 1206 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1207 struct sock *sk = sock->sk;
3b1e0a65 1208 struct net *net = sock_net(sk);
1da177e4
LT
1209 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1210 struct sock *newsk = NULL;
1211 struct sock *other = NULL;
1212 struct sk_buff *skb = NULL;
95c96174 1213 unsigned int hash;
1da177e4
LT
1214 int st;
1215 int err;
1216 long timeo;
1217
1218 err = unix_mkname(sunaddr, addr_len, &hash);
1219 if (err < 0)
1220 goto out;
1221 addr_len = err;
1222
f64f9e71
JP
1223 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1224 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1225 goto out;
1226
1227 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1228
1229 /* First of all allocate resources.
1230 If we will make it after state is locked,
1231 we will have to recheck all again in any case.
1232 */
1233
1234 err = -ENOMEM;
1235
1236 /* create new sock for complete connection */
11aa9c28 1237 newsk = unix_create1(sock_net(sk), NULL, 0);
1da177e4
LT
1238 if (newsk == NULL)
1239 goto out;
1240
1241 /* Allocate skb for sending to listening sock */
1242 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1243 if (skb == NULL)
1244 goto out;
1245
1246restart:
1247 /* Find listening sock. */
097e66c5 1248 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1249 if (!other)
1250 goto out;
1251
1252 /* Latch state of peer */
1c92b4e5 1253 unix_state_lock(other);
1da177e4
LT
1254
1255 /* Apparently VFS overslept socket death. Retry. */
1256 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1257 unix_state_unlock(other);
1da177e4
LT
1258 sock_put(other);
1259 goto restart;
1260 }
1261
1262 err = -ECONNREFUSED;
1263 if (other->sk_state != TCP_LISTEN)
1264 goto out_unlock;
77238f2b
TS
1265 if (other->sk_shutdown & RCV_SHUTDOWN)
1266 goto out_unlock;
1da177e4 1267
3c73419c 1268 if (unix_recvq_full(other)) {
1da177e4
LT
1269 err = -EAGAIN;
1270 if (!timeo)
1271 goto out_unlock;
1272
1273 timeo = unix_wait_for_peer(other, timeo);
1274
1275 err = sock_intr_errno(timeo);
1276 if (signal_pending(current))
1277 goto out;
1278 sock_put(other);
1279 goto restart;
ac7bfa62 1280 }
1da177e4
LT
1281
1282 /* Latch our state.
1283
e5537bfc 1284 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1285 drop lock on peer. It is dangerous because deadlock is
1286 possible. Connect to self case and simultaneous
1287 attempt to connect are eliminated by checking socket
1288 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1289 check this before attempt to grab lock.
1290
1291 Well, and we have to recheck the state after socket locked.
1292 */
1293 st = sk->sk_state;
1294
1295 switch (st) {
1296 case TCP_CLOSE:
1297 /* This is ok... continue with connect */
1298 break;
1299 case TCP_ESTABLISHED:
1300 /* Socket is already connected */
1301 err = -EISCONN;
1302 goto out_unlock;
1303 default:
1304 err = -EINVAL;
1305 goto out_unlock;
1306 }
1307
1c92b4e5 1308 unix_state_lock_nested(sk);
1da177e4
LT
1309
1310 if (sk->sk_state != st) {
1c92b4e5
DM
1311 unix_state_unlock(sk);
1312 unix_state_unlock(other);
1da177e4
LT
1313 sock_put(other);
1314 goto restart;
1315 }
1316
3610cda5 1317 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1318 if (err) {
1c92b4e5 1319 unix_state_unlock(sk);
1da177e4
LT
1320 goto out_unlock;
1321 }
1322
1323 /* The way is open! Fastly set all the necessary fields... */
1324
1325 sock_hold(sk);
1326 unix_peer(newsk) = sk;
1327 newsk->sk_state = TCP_ESTABLISHED;
1328 newsk->sk_type = sk->sk_type;
109f6e39 1329 init_peercred(newsk);
1da177e4 1330 newu = unix_sk(newsk);
eaefd110 1331 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1332 otheru = unix_sk(other);
1333
4fa1d0ff
AV
1334 /* copy address information from listening to new sock
1335 *
1336 * The contents of *(otheru->addr) and otheru->path
1337 * are seen fully set up here, since we have found
1338 * otheru in hash under unix_table_lock. Insertion
1339 * into the hash chain we'd found it in had been done
1340 * in an earlier critical area protected by unix_table_lock,
1341 * the same one where we'd set *(otheru->addr) contents,
1342 * as well as otheru->path and otheru->addr itself.
1343 *
1344 * Using smp_store_release() here to set newu->addr
1345 * is enough to make those stores, as well as stores
1346 * to newu->path visible to anyone who gets newu->addr
1347 * by smp_load_acquire(). IOW, the same warranties
1348 * as for unix_sock instances bound in unix_bind() or
1349 * in unix_autobind().
1350 */
40ffe67d
AV
1351 if (otheru->path.dentry) {
1352 path_get(&otheru->path);
1353 newu->path = otheru->path;
1da177e4 1354 }
4fa1d0ff
AV
1355 refcount_inc(&otheru->addr->refcnt);
1356 smp_store_release(&newu->addr, otheru->addr);
1da177e4
LT
1357
1358 /* Set credentials */
109f6e39 1359 copy_peercred(sk, other);
1da177e4 1360
1da177e4
LT
1361 sock->state = SS_CONNECTED;
1362 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1363 sock_hold(newsk);
1364
4e857c58 1365 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1366 unix_peer(sk) = newsk;
1da177e4 1367
1c92b4e5 1368 unix_state_unlock(sk);
1da177e4
LT
1369
1370 /* take ten and and send info to listening sock */
1371 spin_lock(&other->sk_receive_queue.lock);
1372 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1373 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1374 unix_state_unlock(other);
676d2369 1375 other->sk_data_ready(other);
1da177e4
LT
1376 sock_put(other);
1377 return 0;
1378
1379out_unlock:
1380 if (other)
1c92b4e5 1381 unix_state_unlock(other);
1da177e4
LT
1382
1383out:
40d44446 1384 kfree_skb(skb);
1da177e4
LT
1385 if (newsk)
1386 unix_release_sock(newsk, 0);
1387 if (other)
1388 sock_put(other);
1389 return err;
1390}
1391
1392static int unix_socketpair(struct socket *socka, struct socket *sockb)
1393{
e27dfcea 1394 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1395
1396 /* Join our sockets back to back */
1397 sock_hold(ska);
1398 sock_hold(skb);
e27dfcea
JK
1399 unix_peer(ska) = skb;
1400 unix_peer(skb) = ska;
109f6e39
EB
1401 init_peercred(ska);
1402 init_peercred(skb);
1da177e4
LT
1403
1404 if (ska->sk_type != SOCK_DGRAM) {
1405 ska->sk_state = TCP_ESTABLISHED;
1406 skb->sk_state = TCP_ESTABLISHED;
1407 socka->state = SS_CONNECTED;
1408 sockb->state = SS_CONNECTED;
1409 }
1410 return 0;
1411}
1412
90c6bd34
DB
1413static void unix_sock_inherit_flags(const struct socket *old,
1414 struct socket *new)
1415{
1416 if (test_bit(SOCK_PASSCRED, &old->flags))
1417 set_bit(SOCK_PASSCRED, &new->flags);
1418 if (test_bit(SOCK_PASSSEC, &old->flags))
1419 set_bit(SOCK_PASSSEC, &new->flags);
1420}
1421
cdfbabfb
DH
1422static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1423 bool kern)
1da177e4
LT
1424{
1425 struct sock *sk = sock->sk;
1426 struct sock *tsk;
1427 struct sk_buff *skb;
1428 int err;
1429
1430 err = -EOPNOTSUPP;
6eba6a37 1431 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1432 goto out;
1433
1434 err = -EINVAL;
1435 if (sk->sk_state != TCP_LISTEN)
1436 goto out;
1437
1438 /* If socket state is TCP_LISTEN it cannot change (for now...),
1439 * so that no locks are necessary.
1440 */
1441
1442 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1443 if (!skb) {
1444 /* This means receive shutdown. */
1445 if (err == 0)
1446 err = -EINVAL;
1447 goto out;
1448 }
1449
1450 tsk = skb->sk;
1451 skb_free_datagram(sk, skb);
1452 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1453
1454 /* attach accepted sock to socket */
1c92b4e5 1455 unix_state_lock(tsk);
1da177e4 1456 newsock->state = SS_CONNECTED;
90c6bd34 1457 unix_sock_inherit_flags(sock, newsock);
1da177e4 1458 sock_graft(tsk, newsock);
1c92b4e5 1459 unix_state_unlock(tsk);
1da177e4
LT
1460 return 0;
1461
1462out:
1463 return err;
1464}
1465
1466
9b2c45d4 1467static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1da177e4
LT
1468{
1469 struct sock *sk = sock->sk;
4fa1d0ff 1470 struct unix_address *addr;
13cfa97b 1471 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1472 int err = 0;
1473
1474 if (peer) {
1475 sk = unix_peer_get(sk);
1476
1477 err = -ENOTCONN;
1478 if (!sk)
1479 goto out;
1480 err = 0;
1481 } else {
1482 sock_hold(sk);
1483 }
1484
4fa1d0ff
AV
1485 addr = smp_load_acquire(&unix_sk(sk)->addr);
1486 if (!addr) {
1da177e4
LT
1487 sunaddr->sun_family = AF_UNIX;
1488 sunaddr->sun_path[0] = 0;
9b2c45d4 1489 err = sizeof(short);
1da177e4 1490 } else {
9b2c45d4
DV
1491 err = addr->len;
1492 memcpy(sunaddr, addr->name, addr->len);
1da177e4 1493 }
1da177e4
LT
1494 sock_put(sk);
1495out:
1496 return err;
1497}
1498
1499static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1500{
1501 int i;
1502
1503 scm->fp = UNIXCB(skb).fp;
1da177e4
LT
1504 UNIXCB(skb).fp = NULL;
1505
6eba6a37 1506 for (i = scm->fp->count-1; i >= 0; i--)
415e3d3e 1507 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1da177e4
LT
1508}
1509
7361c36c 1510static void unix_destruct_scm(struct sk_buff *skb)
1da177e4
LT
1511{
1512 struct scm_cookie scm;
1513 memset(&scm, 0, sizeof(scm));
7361c36c 1514 scm.pid = UNIXCB(skb).pid;
7361c36c
EB
1515 if (UNIXCB(skb).fp)
1516 unix_detach_fds(&scm, skb);
1da177e4
LT
1517
1518 /* Alas, it calls VFS */
1519 /* So fscking what? fput() had been SMP-safe since the last Summer */
1520 scm_destroy(&scm);
1521 sock_wfree(skb);
1522}
1523
712f4aad 1524/*
1525 * The "user->unix_inflight" variable is protected by the garbage
1526 * collection lock, and we just read it locklessly here. If you go
1527 * over the limit, there might be a tiny race in actually noticing
1528 * it across threads. Tough.
1529 */
1530static inline bool too_many_unix_fds(struct task_struct *p)
1531{
1532 struct user_struct *user = current_user();
1533
1534 if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1535 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1536 return false;
1537}
1538
6209344f 1539static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1da177e4
LT
1540{
1541 int i;
25888e30 1542
712f4aad 1543 if (too_many_unix_fds(current))
1544 return -ETOOMANYREFS;
1545
6209344f
MS
1546 /*
1547 * Need to duplicate file references for the sake of garbage
1548 * collection. Otherwise a socket in the fps might become a
1549 * candidate for GC while the skb is not yet queued.
1550 */
1551 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1552 if (!UNIXCB(skb).fp)
1553 return -ENOMEM;
1554
712f4aad 1555 for (i = scm->fp->count - 1; i >= 0; i--)
415e3d3e 1556 unix_inflight(scm->fp->user, scm->fp->fp[i]);
27eac47b 1557 return 0;
1da177e4
LT
1558}
1559
f78a5fda 1560static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1561{
1562 int err = 0;
16e57262 1563
f78a5fda 1564 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1565 UNIXCB(skb).uid = scm->creds.uid;
1566 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1567 UNIXCB(skb).fp = NULL;
37a9a8df 1568 unix_get_secdata(scm, skb);
7361c36c
EB
1569 if (scm->fp && send_fds)
1570 err = unix_attach_fds(scm, skb);
1571
1572 skb->destructor = unix_destruct_scm;
1573 return err;
1574}
1575
9490f886
HFS
1576static bool unix_passcred_enabled(const struct socket *sock,
1577 const struct sock *other)
1578{
1579 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1580 !other->sk_socket ||
1581 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1582}
1583
16e57262
ED
1584/*
1585 * Some apps rely on write() giving SCM_CREDENTIALS
1586 * We include credentials if source or destination socket
1587 * asserted SOCK_PASSCRED.
1588 */
1589static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1590 const struct sock *other)
1591{
6b0ee8c0 1592 if (UNIXCB(skb).pid)
16e57262 1593 return;
9490f886 1594 if (unix_passcred_enabled(sock, other)) {
16e57262 1595 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1596 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1597 }
1598}
1599
9490f886
HFS
1600static int maybe_init_creds(struct scm_cookie *scm,
1601 struct socket *socket,
1602 const struct sock *other)
1603{
1604 int err;
1605 struct msghdr msg = { .msg_controllen = 0 };
1606
1607 err = scm_send(socket, &msg, scm, false);
1608 if (err)
1609 return err;
1610
1611 if (unix_passcred_enabled(socket, other)) {
1612 scm->pid = get_pid(task_tgid(current));
1613 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1614 }
1615 return err;
1616}
1617
1618static bool unix_skb_scm_eq(struct sk_buff *skb,
1619 struct scm_cookie *scm)
1620{
1621 const struct unix_skb_parms *u = &UNIXCB(skb);
1622
1623 return u->pid == scm->pid &&
1624 uid_eq(u->uid, scm->creds.uid) &&
1625 gid_eq(u->gid, scm->creds.gid) &&
1626 unix_secdata_eq(scm, skb);
1627}
1628
1da177e4
LT
1629/*
1630 * Send AF_UNIX data.
1631 */
1632
1b784140
YX
1633static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1634 size_t len)
1da177e4 1635{
1da177e4 1636 struct sock *sk = sock->sk;
3b1e0a65 1637 struct net *net = sock_net(sk);
1da177e4 1638 struct unix_sock *u = unix_sk(sk);
342dfc30 1639 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1da177e4
LT
1640 struct sock *other = NULL;
1641 int namelen = 0; /* fake GCC */
1642 int err;
95c96174 1643 unsigned int hash;
f78a5fda 1644 struct sk_buff *skb;
1da177e4 1645 long timeo;
7cc05662 1646 struct scm_cookie scm;
eb6a2481 1647 int data_len = 0;
7d267278 1648 int sk_locked;
1da177e4 1649
5f23b734 1650 wait_for_unix_gc();
7cc05662 1651 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1652 if (err < 0)
1653 return err;
1654
1655 err = -EOPNOTSUPP;
1656 if (msg->msg_flags&MSG_OOB)
1657 goto out;
1658
1659 if (msg->msg_namelen) {
1660 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1661 if (err < 0)
1662 goto out;
1663 namelen = err;
1664 } else {
1665 sunaddr = NULL;
1666 err = -ENOTCONN;
1667 other = unix_peer_get(sk);
1668 if (!other)
1669 goto out;
1670 }
1671
f64f9e71
JP
1672 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1673 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1674 goto out;
1675
1676 err = -EMSGSIZE;
1677 if (len > sk->sk_sndbuf - 32)
1678 goto out;
1679
31ff6aa5 1680 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1681 data_len = min_t(size_t,
1682 len - SKB_MAX_ALLOC,
1683 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1684 data_len = PAGE_ALIGN(data_len);
1685
1686 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1687 }
eb6a2481
ED
1688
1689 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1690 msg->msg_flags & MSG_DONTWAIT, &err,
1691 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1692 if (skb == NULL)
1da177e4
LT
1693 goto out;
1694
7cc05662 1695 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1696 if (err < 0)
7361c36c 1697 goto out_free;
877ce7c1 1698
eb6a2481
ED
1699 skb_put(skb, len - data_len);
1700 skb->data_len = data_len;
1701 skb->len = len;
c0371da6 1702 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1703 if (err)
1704 goto out_free;
1705
1706 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1707
1708restart:
1709 if (!other) {
1710 err = -ECONNRESET;
1711 if (sunaddr == NULL)
1712 goto out_free;
1713
097e66c5 1714 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1715 hash, &err);
e27dfcea 1716 if (other == NULL)
1da177e4
LT
1717 goto out_free;
1718 }
1719
d6ae3bae
AC
1720 if (sk_filter(other, skb) < 0) {
1721 /* Toss the packet but do not return any error to the sender */
1722 err = len;
1723 goto out_free;
1724 }
1725
7d267278 1726 sk_locked = 0;
1c92b4e5 1727 unix_state_lock(other);
7d267278 1728restart_locked:
1da177e4
LT
1729 err = -EPERM;
1730 if (!unix_may_send(sk, other))
1731 goto out_unlock;
1732
7d267278 1733 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1da177e4
LT
1734 /*
1735 * Check with 1003.1g - what should
1736 * datagram error
1737 */
1c92b4e5 1738 unix_state_unlock(other);
1da177e4
LT
1739 sock_put(other);
1740
7d267278
RW
1741 if (!sk_locked)
1742 unix_state_lock(sk);
1743
1da177e4 1744 err = 0;
1da177e4 1745 if (unix_peer(sk) == other) {
e27dfcea 1746 unix_peer(sk) = NULL;
7d267278
RW
1747 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1748
1c92b4e5 1749 unix_state_unlock(sk);
1da177e4
LT
1750
1751 unix_dgram_disconnected(sk, other);
1752 sock_put(other);
1753 err = -ECONNREFUSED;
1754 } else {
1c92b4e5 1755 unix_state_unlock(sk);
1da177e4
LT
1756 }
1757
1758 other = NULL;
1759 if (err)
1760 goto out_free;
1761 goto restart;
1762 }
1763
1764 err = -EPIPE;
1765 if (other->sk_shutdown & RCV_SHUTDOWN)
1766 goto out_unlock;
1767
1768 if (sk->sk_type != SOCK_SEQPACKET) {
1769 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1770 if (err)
1771 goto out_unlock;
1772 }
1773
a5527dda
RW
1774 /* other == sk && unix_peer(other) != sk if
1775 * - unix_peer(sk) == NULL, destination address bound to sk
1776 * - unix_peer(sk) == sk by time of get but disconnected before lock
1777 */
1778 if (other != sk &&
1779 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
7d267278
RW
1780 if (timeo) {
1781 timeo = unix_wait_for_peer(other, timeo);
1782
1783 err = sock_intr_errno(timeo);
1784 if (signal_pending(current))
1785 goto out_free;
1786
1787 goto restart;
1da177e4
LT
1788 }
1789
7d267278
RW
1790 if (!sk_locked) {
1791 unix_state_unlock(other);
1792 unix_state_double_lock(sk, other);
1793 }
1da177e4 1794
7d267278
RW
1795 if (unix_peer(sk) != other ||
1796 unix_dgram_peer_wake_me(sk, other)) {
1797 err = -EAGAIN;
1798 sk_locked = 1;
1799 goto out_unlock;
1800 }
1da177e4 1801
7d267278
RW
1802 if (!sk_locked) {
1803 sk_locked = 1;
1804 goto restart_locked;
1805 }
1da177e4
LT
1806 }
1807
7d267278
RW
1808 if (unlikely(sk_locked))
1809 unix_state_unlock(sk);
1810
3f66116e
AC
1811 if (sock_flag(other, SOCK_RCVTSTAMP))
1812 __net_timestamp(skb);
16e57262 1813 maybe_add_creds(skb, sock, other);
1da177e4 1814 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1815 unix_state_unlock(other);
676d2369 1816 other->sk_data_ready(other);
1da177e4 1817 sock_put(other);
7cc05662 1818 scm_destroy(&scm);
1da177e4
LT
1819 return len;
1820
1821out_unlock:
7d267278
RW
1822 if (sk_locked)
1823 unix_state_unlock(sk);
1c92b4e5 1824 unix_state_unlock(other);
1da177e4
LT
1825out_free:
1826 kfree_skb(skb);
1827out:
1828 if (other)
1829 sock_put(other);
7cc05662 1830 scm_destroy(&scm);
1da177e4
LT
1831 return err;
1832}
1833
e370a723 1834/* We use paged skbs for stream sockets, and limit occupancy to 32768
d4e9a408 1835 * bytes, and a minimum of a full page.
e370a723
ED
1836 */
1837#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 1838
1b784140
YX
1839static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1840 size_t len)
1da177e4 1841{
1da177e4
LT
1842 struct sock *sk = sock->sk;
1843 struct sock *other = NULL;
6eba6a37 1844 int err, size;
f78a5fda 1845 struct sk_buff *skb;
e27dfcea 1846 int sent = 0;
7cc05662 1847 struct scm_cookie scm;
8ba69ba6 1848 bool fds_sent = false;
e370a723 1849 int data_len;
1da177e4 1850
5f23b734 1851 wait_for_unix_gc();
7cc05662 1852 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1853 if (err < 0)
1854 return err;
1855
1856 err = -EOPNOTSUPP;
1857 if (msg->msg_flags&MSG_OOB)
1858 goto out_err;
1859
1860 if (msg->msg_namelen) {
1861 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1862 goto out_err;
1863 } else {
1da177e4 1864 err = -ENOTCONN;
830a1e5c 1865 other = unix_peer(sk);
1da177e4
LT
1866 if (!other)
1867 goto out_err;
1868 }
1869
1870 if (sk->sk_shutdown & SEND_SHUTDOWN)
1871 goto pipe_err;
1872
6eba6a37 1873 while (sent < len) {
e370a723 1874 size = len - sent;
1da177e4
LT
1875
1876 /* Keep two messages in the pipe so it schedules better */
e370a723 1877 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 1878
e370a723
ED
1879 /* allow fallback to order-0 allocations */
1880 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 1881
e370a723 1882 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 1883
31ff6aa5
KT
1884 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1885
e370a723 1886 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
28d64271
ED
1887 msg->msg_flags & MSG_DONTWAIT, &err,
1888 get_order(UNIX_SKB_FRAGS_SZ));
e370a723 1889 if (!skb)
1da177e4
LT
1890 goto out_err;
1891
f78a5fda 1892 /* Only send the fds in the first buffer */
7cc05662 1893 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 1894 if (err < 0) {
7361c36c 1895 kfree_skb(skb);
f78a5fda 1896 goto out_err;
6209344f 1897 }
7361c36c 1898 fds_sent = true;
1da177e4 1899
e370a723
ED
1900 skb_put(skb, size - data_len);
1901 skb->data_len = data_len;
1902 skb->len = size;
c0371da6 1903 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
6eba6a37 1904 if (err) {
1da177e4 1905 kfree_skb(skb);
f78a5fda 1906 goto out_err;
1da177e4
LT
1907 }
1908
1c92b4e5 1909 unix_state_lock(other);
1da177e4
LT
1910
1911 if (sock_flag(other, SOCK_DEAD) ||
1912 (other->sk_shutdown & RCV_SHUTDOWN))
1913 goto pipe_err_free;
1914
16e57262 1915 maybe_add_creds(skb, sock, other);
1da177e4 1916 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1917 unix_state_unlock(other);
676d2369 1918 other->sk_data_ready(other);
e27dfcea 1919 sent += size;
1da177e4 1920 }
1da177e4 1921
7cc05662 1922 scm_destroy(&scm);
1da177e4
LT
1923
1924 return sent;
1925
1926pipe_err_free:
1c92b4e5 1927 unix_state_unlock(other);
1da177e4
LT
1928 kfree_skb(skb);
1929pipe_err:
6eba6a37
ED
1930 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1931 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1932 err = -EPIPE;
1933out_err:
7cc05662 1934 scm_destroy(&scm);
1da177e4
LT
1935 return sent ? : err;
1936}
1937
869e7c62
HFS
1938static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1939 int offset, size_t size, int flags)
1940{
9490f886
HFS
1941 int err;
1942 bool send_sigpipe = false;
1943 bool init_scm = true;
1944 struct scm_cookie scm;
869e7c62
HFS
1945 struct sock *other, *sk = socket->sk;
1946 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1947
1948 if (flags & MSG_OOB)
1949 return -EOPNOTSUPP;
1950
1951 other = unix_peer(sk);
1952 if (!other || sk->sk_state != TCP_ESTABLISHED)
1953 return -ENOTCONN;
1954
1955 if (false) {
1956alloc_skb:
1957 unix_state_unlock(other);
6e1ce3c3 1958 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
1959 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1960 &err, 0);
1961 if (!newskb)
9490f886 1962 goto err;
869e7c62
HFS
1963 }
1964
6e1ce3c3 1965 /* we must acquire iolock as we modify already present
869e7c62
HFS
1966 * skbs in the sk_receive_queue and mess with skb->len
1967 */
6e1ce3c3 1968 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
869e7c62
HFS
1969 if (err) {
1970 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
869e7c62
HFS
1971 goto err;
1972 }
1973
1974 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1975 err = -EPIPE;
9490f886 1976 send_sigpipe = true;
869e7c62
HFS
1977 goto err_unlock;
1978 }
1979
1980 unix_state_lock(other);
1981
1982 if (sock_flag(other, SOCK_DEAD) ||
1983 other->sk_shutdown & RCV_SHUTDOWN) {
1984 err = -EPIPE;
9490f886 1985 send_sigpipe = true;
869e7c62
HFS
1986 goto err_state_unlock;
1987 }
1988
9490f886
HFS
1989 if (init_scm) {
1990 err = maybe_init_creds(&scm, socket, other);
1991 if (err)
1992 goto err_state_unlock;
1993 init_scm = false;
1994 }
1995
869e7c62
HFS
1996 skb = skb_peek_tail(&other->sk_receive_queue);
1997 if (tail && tail == skb) {
1998 skb = newskb;
9490f886
HFS
1999 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2000 if (newskb) {
869e7c62 2001 skb = newskb;
9490f886
HFS
2002 } else {
2003 tail = skb;
869e7c62 2004 goto alloc_skb;
9490f886 2005 }
869e7c62
HFS
2006 } else if (newskb) {
2007 /* this is fast path, we don't necessarily need to
2008 * call to kfree_skb even though with newskb == NULL
2009 * this - does no harm
2010 */
2011 consume_skb(newskb);
8844f972 2012 newskb = NULL;
869e7c62
HFS
2013 }
2014
2015 if (skb_append_pagefrags(skb, page, offset, size)) {
2016 tail = skb;
2017 goto alloc_skb;
2018 }
2019
2020 skb->len += size;
2021 skb->data_len += size;
2022 skb->truesize += size;
14afee4b 2023 refcount_add(size, &sk->sk_wmem_alloc);
869e7c62 2024
a3a116e0 2025 if (newskb) {
9490f886
HFS
2026 err = unix_scm_to_skb(&scm, skb, false);
2027 if (err)
2028 goto err_state_unlock;
a3a116e0 2029 spin_lock(&other->sk_receive_queue.lock);
869e7c62 2030 __skb_queue_tail(&other->sk_receive_queue, newskb);
a3a116e0
HFS
2031 spin_unlock(&other->sk_receive_queue.lock);
2032 }
869e7c62
HFS
2033
2034 unix_state_unlock(other);
6e1ce3c3 2035 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2036
2037 other->sk_data_ready(other);
9490f886 2038 scm_destroy(&scm);
869e7c62
HFS
2039 return size;
2040
2041err_state_unlock:
2042 unix_state_unlock(other);
2043err_unlock:
6e1ce3c3 2044 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2045err:
2046 kfree_skb(newskb);
2047 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2048 send_sig(SIGPIPE, current, 0);
9490f886
HFS
2049 if (!init_scm)
2050 scm_destroy(&scm);
869e7c62
HFS
2051 return err;
2052}
2053
1b784140
YX
2054static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2055 size_t len)
1da177e4
LT
2056{
2057 int err;
2058 struct sock *sk = sock->sk;
ac7bfa62 2059
1da177e4
LT
2060 err = sock_error(sk);
2061 if (err)
2062 return err;
2063
2064 if (sk->sk_state != TCP_ESTABLISHED)
2065 return -ENOTCONN;
2066
2067 if (msg->msg_namelen)
2068 msg->msg_namelen = 0;
2069
1b784140 2070 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 2071}
ac7bfa62 2072
1b784140
YX
2073static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2074 size_t size, int flags)
a05d2ad1
EB
2075{
2076 struct sock *sk = sock->sk;
2077
2078 if (sk->sk_state != TCP_ESTABLISHED)
2079 return -ENOTCONN;
2080
1b784140 2081 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
2082}
2083
1da177e4
LT
2084static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2085{
4fa1d0ff 2086 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
1da177e4 2087
4fa1d0ff
AV
2088 if (addr) {
2089 msg->msg_namelen = addr->len;
2090 memcpy(msg->msg_name, addr->name, addr->len);
1da177e4
LT
2091 }
2092}
2093
1b784140
YX
2094static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2095 size_t size, int flags)
1da177e4 2096{
7cc05662 2097 struct scm_cookie scm;
1da177e4
LT
2098 struct sock *sk = sock->sk;
2099 struct unix_sock *u = unix_sk(sk);
64874280
RW
2100 struct sk_buff *skb, *last;
2101 long timeo;
1da177e4 2102 int err;
f55bb7f9 2103 int peeked, skip;
1da177e4
LT
2104
2105 err = -EOPNOTSUPP;
2106 if (flags&MSG_OOB)
2107 goto out;
2108
64874280 2109 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4 2110
64874280 2111 do {
6e1ce3c3 2112 mutex_lock(&u->iolock);
f55bb7f9 2113
64874280 2114 skip = sk_peek_offset(sk, flags);
7c13f97f
PA
2115 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2116 &err, &last);
64874280
RW
2117 if (skb)
2118 break;
2119
6e1ce3c3 2120 mutex_unlock(&u->iolock);
64874280
RW
2121
2122 if (err != -EAGAIN)
2123 break;
2124 } while (timeo &&
2125 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2126
6e1ce3c3 2127 if (!skb) { /* implies iolock unlocked */
0a112258
FZ
2128 unix_state_lock(sk);
2129 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2130 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2131 (sk->sk_shutdown & RCV_SHUTDOWN))
2132 err = 0;
2133 unix_state_unlock(sk);
64874280 2134 goto out;
0a112258 2135 }
1da177e4 2136
77b75f4d
RW
2137 if (wq_has_sleeper(&u->peer_wait))
2138 wake_up_interruptible_sync_poll(&u->peer_wait,
a9a08845
LT
2139 EPOLLOUT | EPOLLWRNORM |
2140 EPOLLWRBAND);
1da177e4
LT
2141
2142 if (msg->msg_name)
2143 unix_copy_addr(msg, skb->sk);
2144
f55bb7f9
PE
2145 if (size > skb->len - skip)
2146 size = skb->len - skip;
2147 else if (size < skb->len - skip)
1da177e4
LT
2148 msg->msg_flags |= MSG_TRUNC;
2149
51f3d02b 2150 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
2151 if (err)
2152 goto out_free;
2153
3f66116e
AC
2154 if (sock_flag(sk, SOCK_RCVTSTAMP))
2155 __sock_recv_timestamp(msg, sk, skb);
2156
7cc05662
CH
2157 memset(&scm, 0, sizeof(scm));
2158
2159 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2160 unix_set_secdata(&scm, skb);
1da177e4 2161
6eba6a37 2162 if (!(flags & MSG_PEEK)) {
1da177e4 2163 if (UNIXCB(skb).fp)
7cc05662 2164 unix_detach_fds(&scm, skb);
f55bb7f9
PE
2165
2166 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 2167 } else {
1da177e4
LT
2168 /* It is questionable: on PEEK we could:
2169 - do not return fds - good, but too simple 8)
2170 - return fds, and do not return them on read (old strategy,
2171 apparently wrong)
2172 - clone fds (I chose it for now, it is the most universal
2173 solution)
ac7bfa62
YH
2174
2175 POSIX 1003.1g does not actually define this clearly
2176 at all. POSIX 1003.1g doesn't define a lot of things
2177 clearly however!
2178
1da177e4 2179 */
f55bb7f9
PE
2180
2181 sk_peek_offset_fwd(sk, size);
2182
1da177e4 2183 if (UNIXCB(skb).fp)
7cc05662 2184 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2185 }
9f6f9af7 2186 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 2187
7cc05662 2188 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
2189
2190out_free:
6eba6a37 2191 skb_free_datagram(sk, skb);
6e1ce3c3 2192 mutex_unlock(&u->iolock);
1da177e4
LT
2193out:
2194 return err;
2195}
2196
2197/*
79f632c7 2198 * Sleep until more data has arrived. But check for races..
1da177e4 2199 */
79f632c7 2200static long unix_stream_data_wait(struct sock *sk, long timeo,
06a77b07
WC
2201 struct sk_buff *last, unsigned int last_len,
2202 bool freezable)
1da177e4 2203{
2b514574 2204 struct sk_buff *tail;
1da177e4
LT
2205 DEFINE_WAIT(wait);
2206
1c92b4e5 2207 unix_state_lock(sk);
1da177e4
LT
2208
2209 for (;;) {
aa395145 2210 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 2211
2b514574
HFS
2212 tail = skb_peek_tail(&sk->sk_receive_queue);
2213 if (tail != last ||
2214 (tail && tail->len != last_len) ||
1da177e4
LT
2215 sk->sk_err ||
2216 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2217 signal_pending(current) ||
2218 !timeo)
2219 break;
2220
9cd3e072 2221 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1c92b4e5 2222 unix_state_unlock(sk);
06a77b07
WC
2223 if (freezable)
2224 timeo = freezable_schedule_timeout(timeo);
2225 else
2226 timeo = schedule_timeout(timeo);
1c92b4e5 2227 unix_state_lock(sk);
b48732e4
MS
2228
2229 if (sock_flag(sk, SOCK_DEAD))
2230 break;
2231
9cd3e072 2232 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1da177e4
LT
2233 }
2234
aa395145 2235 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2236 unix_state_unlock(sk);
1da177e4
LT
2237 return timeo;
2238}
2239
e370a723
ED
2240static unsigned int unix_skb_len(const struct sk_buff *skb)
2241{
2242 return skb->len - UNIXCB(skb).consumed;
2243}
2244
2b514574
HFS
2245struct unix_stream_read_state {
2246 int (*recv_actor)(struct sk_buff *, int, int,
2247 struct unix_stream_read_state *);
2248 struct socket *socket;
2249 struct msghdr *msg;
2250 struct pipe_inode_info *pipe;
2251 size_t size;
2252 int flags;
2253 unsigned int splice_flags;
2254};
2255
06a77b07
WC
2256static int unix_stream_read_generic(struct unix_stream_read_state *state,
2257 bool freezable)
1da177e4 2258{
7cc05662 2259 struct scm_cookie scm;
2b514574 2260 struct socket *sock = state->socket;
1da177e4
LT
2261 struct sock *sk = sock->sk;
2262 struct unix_sock *u = unix_sk(sk);
1da177e4 2263 int copied = 0;
2b514574 2264 int flags = state->flags;
de144391 2265 int noblock = flags & MSG_DONTWAIT;
2b514574 2266 bool check_creds = false;
1da177e4
LT
2267 int target;
2268 int err = 0;
2269 long timeo;
fc0d7536 2270 int skip;
2b514574
HFS
2271 size_t size = state->size;
2272 unsigned int last_len;
1da177e4 2273
1b92ee3d
RW
2274 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2275 err = -EINVAL;
1da177e4 2276 goto out;
1b92ee3d 2277 }
1da177e4 2278
1b92ee3d
RW
2279 if (unlikely(flags & MSG_OOB)) {
2280 err = -EOPNOTSUPP;
1da177e4 2281 goto out;
1b92ee3d 2282 }
1da177e4 2283
2b514574 2284 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2285 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2286
2b514574
HFS
2287 memset(&scm, 0, sizeof(scm));
2288
1da177e4
LT
2289 /* Lock the socket to prevent queue disordering
2290 * while sleeps in memcpy_tomsg
2291 */
6e1ce3c3 2292 mutex_lock(&u->iolock);
1da177e4 2293
a0917e0b 2294 skip = max(sk_peek_offset(sk, flags), 0);
e9193d60 2295
6eba6a37 2296 do {
1da177e4 2297 int chunk;
73ed5d25 2298 bool drop_skb;
79f632c7 2299 struct sk_buff *skb, *last;
1da177e4 2300
18eceb81 2301redo:
3c0d2f37 2302 unix_state_lock(sk);
b48732e4
MS
2303 if (sock_flag(sk, SOCK_DEAD)) {
2304 err = -ECONNRESET;
2305 goto unlock;
2306 }
79f632c7 2307 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2308 last_len = last ? last->len : 0;
fc0d7536 2309again:
6eba6a37 2310 if (skb == NULL) {
1da177e4 2311 if (copied >= target)
3c0d2f37 2312 goto unlock;
1da177e4
LT
2313
2314 /*
2315 * POSIX 1003.1g mandates this order.
2316 */
ac7bfa62 2317
6eba6a37
ED
2318 err = sock_error(sk);
2319 if (err)
3c0d2f37 2320 goto unlock;
1da177e4 2321 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2322 goto unlock;
2323
2324 unix_state_unlock(sk);
1b92ee3d
RW
2325 if (!timeo) {
2326 err = -EAGAIN;
1da177e4 2327 break;
1b92ee3d
RW
2328 }
2329
6e1ce3c3 2330 mutex_unlock(&u->iolock);
1da177e4 2331
2b514574 2332 timeo = unix_stream_data_wait(sk, timeo, last,
06a77b07 2333 last_len, freezable);
1da177e4 2334
3822b5c2 2335 if (signal_pending(current)) {
1da177e4 2336 err = sock_intr_errno(timeo);
fa0dc04d 2337 scm_destroy(&scm);
1da177e4
LT
2338 goto out;
2339 }
b3ca9b02 2340
6e1ce3c3 2341 mutex_lock(&u->iolock);
18eceb81 2342 goto redo;
2b514574 2343unlock:
3c0d2f37
MS
2344 unix_state_unlock(sk);
2345 break;
1da177e4 2346 }
fc0d7536 2347
e370a723
ED
2348 while (skip >= unix_skb_len(skb)) {
2349 skip -= unix_skb_len(skb);
79f632c7 2350 last = skb;
2b514574 2351 last_len = skb->len;
fc0d7536 2352 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2353 if (!skb)
2354 goto again;
fc0d7536
PE
2355 }
2356
3c0d2f37 2357 unix_state_unlock(sk);
1da177e4
LT
2358
2359 if (check_creds) {
2360 /* Never glue messages from different writers */
9490f886 2361 if (!unix_skb_scm_eq(skb, &scm))
1da177e4 2362 break;
0e82e7f6 2363 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2364 /* Copy credentials */
7cc05662 2365 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2366 unix_set_secdata(&scm, skb);
2b514574 2367 check_creds = true;
1da177e4
LT
2368 }
2369
2370 /* Copy address just once */
2b514574
HFS
2371 if (state->msg && state->msg->msg_name) {
2372 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2373 state->msg->msg_name);
2374 unix_copy_addr(state->msg, skb->sk);
1da177e4
LT
2375 sunaddr = NULL;
2376 }
2377
e370a723 2378 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
73ed5d25 2379 skb_get(skb);
2b514574 2380 chunk = state->recv_actor(skb, skip, chunk, state);
73ed5d25
HFS
2381 drop_skb = !unix_skb_len(skb);
2382 /* skb is only safe to use if !drop_skb */
2383 consume_skb(skb);
2b514574 2384 if (chunk < 0) {
1da177e4
LT
2385 if (copied == 0)
2386 copied = -EFAULT;
2387 break;
2388 }
2389 copied += chunk;
2390 size -= chunk;
2391
73ed5d25
HFS
2392 if (drop_skb) {
2393 /* the skb was touched by a concurrent reader;
2394 * we should not expect anything from this skb
2395 * anymore and assume it invalid - we can be
2396 * sure it was dropped from the socket queue
2397 *
2398 * let's report a short read
2399 */
2400 err = 0;
2401 break;
2402 }
2403
1da177e4 2404 /* Mark read part of skb as used */
6eba6a37 2405 if (!(flags & MSG_PEEK)) {
e370a723 2406 UNIXCB(skb).consumed += chunk;
1da177e4 2407
fc0d7536
PE
2408 sk_peek_offset_bwd(sk, chunk);
2409
1da177e4 2410 if (UNIXCB(skb).fp)
7cc05662 2411 unix_detach_fds(&scm, skb);
1da177e4 2412
e370a723 2413 if (unix_skb_len(skb))
1da177e4 2414 break;
1da177e4 2415
6f01fd6e 2416 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2417 consume_skb(skb);
1da177e4 2418
7cc05662 2419 if (scm.fp)
1da177e4 2420 break;
6eba6a37 2421 } else {
1da177e4
LT
2422 /* It is questionable, see note in unix_dgram_recvmsg.
2423 */
2424 if (UNIXCB(skb).fp)
7cc05662 2425 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2426
e9193d60 2427 sk_peek_offset_fwd(sk, chunk);
fc0d7536 2428
9f389e35
AC
2429 if (UNIXCB(skb).fp)
2430 break;
2431
e9193d60 2432 skip = 0;
9f389e35
AC
2433 last = skb;
2434 last_len = skb->len;
2435 unix_state_lock(sk);
2436 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2437 if (skb)
2438 goto again;
2439 unix_state_unlock(sk);
1da177e4
LT
2440 break;
2441 }
2442 } while (size);
2443
6e1ce3c3 2444 mutex_unlock(&u->iolock);
2b514574
HFS
2445 if (state->msg)
2446 scm_recv(sock, state->msg, &scm, flags);
2447 else
2448 scm_destroy(&scm);
1da177e4
LT
2449out:
2450 return copied ? : err;
2451}
2452
2b514574
HFS
2453static int unix_stream_read_actor(struct sk_buff *skb,
2454 int skip, int chunk,
2455 struct unix_stream_read_state *state)
2456{
2457 int ret;
2458
2459 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2460 state->msg, chunk);
2461 return ret ?: chunk;
2462}
2463
2464static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2465 size_t size, int flags)
2466{
2467 struct unix_stream_read_state state = {
2468 .recv_actor = unix_stream_read_actor,
2469 .socket = sock,
2470 .msg = msg,
2471 .size = size,
2472 .flags = flags
2473 };
2474
06a77b07 2475 return unix_stream_read_generic(&state, true);
2b514574
HFS
2476}
2477
2b514574
HFS
2478static int unix_stream_splice_actor(struct sk_buff *skb,
2479 int skip, int chunk,
2480 struct unix_stream_read_state *state)
2481{
2482 return skb_splice_bits(skb, state->socket->sk,
2483 UNIXCB(skb).consumed + skip,
25869262 2484 state->pipe, chunk, state->splice_flags);
2b514574
HFS
2485}
2486
2487static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2488 struct pipe_inode_info *pipe,
2489 size_t size, unsigned int flags)
2490{
2491 struct unix_stream_read_state state = {
2492 .recv_actor = unix_stream_splice_actor,
2493 .socket = sock,
2494 .pipe = pipe,
2495 .size = size,
2496 .splice_flags = flags,
2497 };
2498
2499 if (unlikely(*ppos))
2500 return -ESPIPE;
2501
2502 if (sock->file->f_flags & O_NONBLOCK ||
2503 flags & SPLICE_F_NONBLOCK)
2504 state.flags = MSG_DONTWAIT;
2505
06a77b07 2506 return unix_stream_read_generic(&state, false);
2b514574
HFS
2507}
2508
1da177e4
LT
2509static int unix_shutdown(struct socket *sock, int mode)
2510{
2511 struct sock *sk = sock->sk;
2512 struct sock *other;
2513
fc61b928
XW
2514 if (mode < SHUT_RD || mode > SHUT_RDWR)
2515 return -EINVAL;
2516 /* This maps:
2517 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2518 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2519 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2520 */
2521 ++mode;
7180a031
AC
2522
2523 unix_state_lock(sk);
2524 sk->sk_shutdown |= mode;
2525 other = unix_peer(sk);
2526 if (other)
2527 sock_hold(other);
2528 unix_state_unlock(sk);
2529 sk->sk_state_change(sk);
2530
2531 if (other &&
2532 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2533
2534 int peer_mode = 0;
2535
2536 if (mode&RCV_SHUTDOWN)
2537 peer_mode |= SEND_SHUTDOWN;
2538 if (mode&SEND_SHUTDOWN)
2539 peer_mode |= RCV_SHUTDOWN;
2540 unix_state_lock(other);
2541 other->sk_shutdown |= peer_mode;
2542 unix_state_unlock(other);
2543 other->sk_state_change(other);
2544 if (peer_mode == SHUTDOWN_MASK)
2545 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2546 else if (peer_mode & RCV_SHUTDOWN)
2547 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2548 }
7180a031
AC
2549 if (other)
2550 sock_put(other);
2551
1da177e4
LT
2552 return 0;
2553}
2554
885ee74d
PE
2555long unix_inq_len(struct sock *sk)
2556{
2557 struct sk_buff *skb;
2558 long amount = 0;
2559
2560 if (sk->sk_state == TCP_LISTEN)
2561 return -EINVAL;
2562
2563 spin_lock(&sk->sk_receive_queue.lock);
2564 if (sk->sk_type == SOCK_STREAM ||
2565 sk->sk_type == SOCK_SEQPACKET) {
2566 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 2567 amount += unix_skb_len(skb);
885ee74d
PE
2568 } else {
2569 skb = skb_peek(&sk->sk_receive_queue);
2570 if (skb)
2571 amount = skb->len;
2572 }
2573 spin_unlock(&sk->sk_receive_queue.lock);
2574
2575 return amount;
2576}
2577EXPORT_SYMBOL_GPL(unix_inq_len);
2578
2579long unix_outq_len(struct sock *sk)
2580{
2581 return sk_wmem_alloc_get(sk);
2582}
2583EXPORT_SYMBOL_GPL(unix_outq_len);
2584
ba94f308
AV
2585static int unix_open_file(struct sock *sk)
2586{
2587 struct path path;
2588 struct file *f;
2589 int fd;
2590
2591 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2592 return -EPERM;
2593
4fa1d0ff
AV
2594 if (!smp_load_acquire(&unix_sk(sk)->addr))
2595 return -ENOENT;
2596
ba94f308 2597 path = unix_sk(sk)->path;
4fa1d0ff 2598 if (!path.dentry)
ba94f308 2599 return -ENOENT;
ba94f308
AV
2600
2601 path_get(&path);
ba94f308
AV
2602
2603 fd = get_unused_fd_flags(O_CLOEXEC);
2604 if (fd < 0)
2605 goto out;
2606
2607 f = dentry_open(&path, O_PATH, current_cred());
2608 if (IS_ERR(f)) {
2609 put_unused_fd(fd);
2610 fd = PTR_ERR(f);
2611 goto out;
2612 }
2613
2614 fd_install(fd, f);
2615out:
2616 path_put(&path);
2617
2618 return fd;
2619}
2620
1da177e4
LT
2621static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2622{
2623 struct sock *sk = sock->sk;
e27dfcea 2624 long amount = 0;
1da177e4
LT
2625 int err;
2626
6eba6a37
ED
2627 switch (cmd) {
2628 case SIOCOUTQ:
885ee74d 2629 amount = unix_outq_len(sk);
6eba6a37
ED
2630 err = put_user(amount, (int __user *)arg);
2631 break;
2632 case SIOCINQ:
885ee74d
PE
2633 amount = unix_inq_len(sk);
2634 if (amount < 0)
2635 err = amount;
2636 else
1da177e4 2637 err = put_user(amount, (int __user *)arg);
885ee74d 2638 break;
ba94f308
AV
2639 case SIOCUNIXFILE:
2640 err = unix_open_file(sk);
2641 break;
6eba6a37
ED
2642 default:
2643 err = -ENOIOCTLCMD;
2644 break;
1da177e4
LT
2645 }
2646 return err;
2647}
2648
a11e1d43 2649static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
2650{
2651 struct sock *sk = sock->sk;
a11e1d43
LT
2652 __poll_t mask;
2653
89ab066d 2654 sock_poll_wait(file, sock, wait);
a11e1d43 2655 mask = 0;
1da177e4
LT
2656
2657 /* exceptional events? */
2658 if (sk->sk_err)
a9a08845 2659 mask |= EPOLLERR;
1da177e4 2660 if (sk->sk_shutdown == SHUTDOWN_MASK)
a9a08845 2661 mask |= EPOLLHUP;
f348d70a 2662 if (sk->sk_shutdown & RCV_SHUTDOWN)
a9a08845 2663 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
1da177e4
LT
2664
2665 /* readable? */
db40980f 2666 if (!skb_queue_empty(&sk->sk_receive_queue))
a9a08845 2667 mask |= EPOLLIN | EPOLLRDNORM;
1da177e4
LT
2668
2669 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2670 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2671 sk->sk_state == TCP_CLOSE)
a9a08845 2672 mask |= EPOLLHUP;
1da177e4
LT
2673
2674 /*
2675 * we set writable also when the other side has shut down the
2676 * connection. This prevents stuck sockets.
2677 */
2678 if (unix_writable(sk))
a9a08845 2679 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1da177e4
LT
2680
2681 return mask;
2682}
2683
a11e1d43
LT
2684static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2685 poll_table *wait)
3c73419c 2686{
ec0d215f 2687 struct sock *sk = sock->sk, *other;
a11e1d43
LT
2688 unsigned int writable;
2689 __poll_t mask;
2690
89ab066d 2691 sock_poll_wait(file, sock, wait);
a11e1d43 2692 mask = 0;
3c73419c
RW
2693
2694 /* exceptional events? */
2695 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
a9a08845
LT
2696 mask |= EPOLLERR |
2697 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
7d4c04fc 2698
3c73419c 2699 if (sk->sk_shutdown & RCV_SHUTDOWN)
a9a08845 2700 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3c73419c 2701 if (sk->sk_shutdown == SHUTDOWN_MASK)
a9a08845 2702 mask |= EPOLLHUP;
3c73419c
RW
2703
2704 /* readable? */
5456f09a 2705 if (!skb_queue_empty(&sk->sk_receive_queue))
a9a08845 2706 mask |= EPOLLIN | EPOLLRDNORM;
3c73419c
RW
2707
2708 /* Connection-based need to check for termination and startup */
2709 if (sk->sk_type == SOCK_SEQPACKET) {
2710 if (sk->sk_state == TCP_CLOSE)
a9a08845 2711 mask |= EPOLLHUP;
3c73419c
RW
2712 /* connection hasn't started yet? */
2713 if (sk->sk_state == TCP_SYN_SENT)
2714 return mask;
2715 }
2716
973a34aa 2717 /* No write status requested, avoid expensive OUT tests. */
a11e1d43 2718 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
973a34aa
ED
2719 return mask;
2720
ec0d215f 2721 writable = unix_writable(sk);
7d267278
RW
2722 if (writable) {
2723 unix_state_lock(sk);
2724
2725 other = unix_peer(sk);
2726 if (other && unix_peer(other) != sk &&
2727 unix_recvq_full(other) &&
2728 unix_dgram_peer_wake_me(sk, other))
2729 writable = 0;
2730
2731 unix_state_unlock(sk);
ec0d215f
RW
2732 }
2733
2734 if (writable)
a9a08845 2735 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3c73419c 2736 else
9cd3e072 2737 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3c73419c 2738
3c73419c
RW
2739 return mask;
2740}
1da177e4
LT
2741
2742#ifdef CONFIG_PROC_FS
a53eb3fe 2743
7123aaa3
ED
2744#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2745
2746#define get_bucket(x) ((x) >> BUCKET_SPACE)
2747#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2748#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 2749
7123aaa3 2750static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 2751{
7123aaa3
ED
2752 unsigned long offset = get_offset(*pos);
2753 unsigned long bucket = get_bucket(*pos);
2754 struct sock *sk;
2755 unsigned long count = 0;
1da177e4 2756
7123aaa3
ED
2757 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2758 if (sock_net(sk) != seq_file_net(seq))
097e66c5 2759 continue;
7123aaa3
ED
2760 if (++count == offset)
2761 break;
2762 }
2763
2764 return sk;
2765}
2766
2767static struct sock *unix_next_socket(struct seq_file *seq,
2768 struct sock *sk,
2769 loff_t *pos)
2770{
2771 unsigned long bucket;
2772
2773 while (sk > (struct sock *)SEQ_START_TOKEN) {
2774 sk = sk_next(sk);
2775 if (!sk)
2776 goto next_bucket;
2777 if (sock_net(sk) == seq_file_net(seq))
2778 return sk;
1da177e4 2779 }
7123aaa3
ED
2780
2781 do {
2782 sk = unix_from_bucket(seq, pos);
2783 if (sk)
2784 return sk;
2785
2786next_bucket:
2787 bucket = get_bucket(*pos) + 1;
2788 *pos = set_bucket_offset(bucket, 1);
2789 } while (bucket < ARRAY_SIZE(unix_socket_table));
2790
1da177e4
LT
2791 return NULL;
2792}
2793
1da177e4 2794static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2795 __acquires(unix_table_lock)
1da177e4 2796{
fbe9cc4a 2797 spin_lock(&unix_table_lock);
7123aaa3
ED
2798
2799 if (!*pos)
2800 return SEQ_START_TOKEN;
2801
2802 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2803 return NULL;
2804
2805 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
2806}
2807
2808static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2809{
2810 ++*pos;
7123aaa3 2811 return unix_next_socket(seq, v, pos);
1da177e4
LT
2812}
2813
2814static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2815 __releases(unix_table_lock)
1da177e4 2816{
fbe9cc4a 2817 spin_unlock(&unix_table_lock);
1da177e4
LT
2818}
2819
2820static int unix_seq_show(struct seq_file *seq, void *v)
2821{
ac7bfa62 2822
b9f3124f 2823 if (v == SEQ_START_TOKEN)
1da177e4
LT
2824 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2825 "Inode Path\n");
2826 else {
2827 struct sock *s = v;
2828 struct unix_sock *u = unix_sk(s);
1c92b4e5 2829 unix_state_lock(s);
1da177e4 2830
71338aa7 2831 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4 2832 s,
41c6d650 2833 refcount_read(&s->sk_refcnt),
1da177e4
LT
2834 0,
2835 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2836 s->sk_type,
2837 s->sk_socket ?
2838 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2839 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2840 sock_i_ino(s));
2841
4fa1d0ff 2842 if (u->addr) { // under unix_table_lock here
1da177e4
LT
2843 int i, len;
2844 seq_putc(seq, ' ');
2845
2846 i = 0;
2847 len = u->addr->len - sizeof(short);
2848 if (!UNIX_ABSTRACT(s))
2849 len--;
2850 else {
2851 seq_putc(seq, '@');
2852 i++;
2853 }
2854 for ( ; i < len; i++)
e7947ea7
IB
2855 seq_putc(seq, u->addr->name->sun_path[i] ?:
2856 '@');
1da177e4 2857 }
1c92b4e5 2858 unix_state_unlock(s);
1da177e4
LT
2859 seq_putc(seq, '\n');
2860 }
2861
2862 return 0;
2863}
2864
56b3d975 2865static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2866 .start = unix_seq_start,
2867 .next = unix_seq_next,
2868 .stop = unix_seq_stop,
2869 .show = unix_seq_show,
2870};
1da177e4
LT
2871#endif
2872
ec1b4cf7 2873static const struct net_proto_family unix_family_ops = {
1da177e4
LT
2874 .family = PF_UNIX,
2875 .create = unix_create,
2876 .owner = THIS_MODULE,
2877};
2878
097e66c5 2879
2c8c1e72 2880static int __net_init unix_net_init(struct net *net)
097e66c5
DL
2881{
2882 int error = -ENOMEM;
2883
a0a53c8b 2884 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2885 if (unix_sysctl_register(net))
2886 goto out;
d392e497 2887
097e66c5 2888#ifdef CONFIG_PROC_FS
c3506372
CH
2889 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2890 sizeof(struct seq_net_private))) {
1597fbc0 2891 unix_sysctl_unregister(net);
097e66c5 2892 goto out;
1597fbc0 2893 }
097e66c5
DL
2894#endif
2895 error = 0;
2896out:
48dcc33e 2897 return error;
097e66c5
DL
2898}
2899
2c8c1e72 2900static void __net_exit unix_net_exit(struct net *net)
097e66c5 2901{
1597fbc0 2902 unix_sysctl_unregister(net);
ece31ffd 2903 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
2904}
2905
2906static struct pernet_operations unix_net_ops = {
2907 .init = unix_net_init,
2908 .exit = unix_net_exit,
2909};
2910
1da177e4
LT
2911static int __init af_unix_init(void)
2912{
2913 int rc = -1;
1da177e4 2914
b4fff5f8 2915 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
1da177e4
LT
2916
2917 rc = proto_register(&unix_proto, 1);
ac7bfa62 2918 if (rc != 0) {
5cc208be 2919 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
1da177e4
LT
2920 goto out;
2921 }
2922
2923 sock_register(&unix_family_ops);
097e66c5 2924 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2925out:
2926 return rc;
2927}
2928
2929static void __exit af_unix_exit(void)
2930{
2931 sock_unregister(PF_UNIX);
1da177e4 2932 proto_unregister(&unix_proto);
097e66c5 2933 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2934}
2935
3d366960
DW
2936/* Earlier than device_initcall() so that other drivers invoking
2937 request_module() don't end up in a loop when modprobe tries
2938 to use a UNIX socket. But later than subsys_initcall() because
2939 we depend on stuff initialised there */
2940fs_initcall(af_unix_init);
1da177e4
LT
2941module_exit(af_unix_exit);
2942
2943MODULE_LICENSE("GPL");
2944MODULE_ALIAS_NETPROTO(PF_UNIX);