net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched/signal.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <linux/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120 #include <linux/file.h>
 121
 122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123 EXPORT_SYMBOL_GPL(unix_socket_table);
 124 DEFINE_SPINLOCK(unix_table_lock);
 125 EXPORT_SYMBOL_GPL(unix_table_lock);
 126 static atomic_long_t unix_nr_socks;
 127
 128
 129 static struct hlist_head *unix_sockets_unbound(void *addr)
 130 {
 131         unsigned long hash = (unsigned long)addr;
 132
 133         hash ^= hash >> 16;
 134         hash ^= hash >> 8;
 135         hash %= UNIX_HASH_SIZE;
 136         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137 }
 138
 139 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141 #ifdef CONFIG_SECURITY_NETWORK
 142 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143 {
 144         UNIXCB(skb).secid = scm->secid;
 145 }
 146
 147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148 {
 149         scm->secid = UNIXCB(skb).secid;
 150 }
 151
 152 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153 {
 154         return (scm->secid == UNIXCB(skb).secid);
 155 }
 156 #else
 157 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161 { }
 162
 163 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164 {
 165         return true;
 166 }
 167 #endif /* CONFIG_SECURITY_NETWORK */
 168
 169 /*
 170  *  SMP locking strategy:
 171  *    hash table is protected with spinlock unix_table_lock
 172  *    each socket state is protected by separate spin lock.
 173  */
 174
 175 static inline unsigned int unix_hash_fold(__wsum n)
 176 {
 177         unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179         hash ^= hash>>8;
 180         return hash&(UNIX_HASH_SIZE-1);
 181 }
 182
 183 #define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186 {
 187         return unix_peer(osk) == sk;
 188 }
 189
 190 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191 {
 192         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193 }
 194
 195 static inline int unix_recvq_full(struct sock const *sk)
 196 {
 197         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198 }
 199
 200 struct sock *unix_peer_get(struct sock *s)
 201 {
 202         struct sock *peer;
 203
 204         unix_state_lock(s);
 205         peer = unix_peer(s);
 206         if (peer)
 207                 sock_hold(peer);
 208         unix_state_unlock(s);
 209         return peer;
 210 }
 211 EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213 static inline void unix_release_addr(struct unix_address *addr)
 214 {
 215         if (refcount_dec_and_test(&addr->refcnt))
 216                 kfree(addr);
 217 }
 218
 219 /*
 220  *      Check unix socket name:
 221  *              - should be not zero length.
 222  *              - if started by not zero, should be NULL terminated (FS object)
 223  *              - if started by zero, it is abstract name.
 224  */
 225
 226 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227 {
 228         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 229                 return -EINVAL;
 230         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 231                 return -EINVAL;
 232         if (sunaddr->sun_path[0]) {
 233                 /*
 234                  * This may look like an off by one error but it is a bit more
 235                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 236                  * sun_path[108] doesn't as such exist.  However in kernel space
 237                  * we are guaranteed that it is a valid memory location in our
 238                  * kernel address buffer.
 239                  */
 240                 ((char *)sunaddr)[len] = 0;
 241                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 242                 return len;
 243         }
 244
 245         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 246         return len;
 247 }
 248
 249 static void __unix_remove_socket(struct sock *sk)
 250 {
 251         sk_del_node_init(sk);
 252 }
 253
 254 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255 {
 256         WARN_ON(!sk_unhashed(sk));
 257         sk_add_node(sk, list);
 258 }
 259
 260 static inline void unix_remove_socket(struct sock *sk)
 261 {
 262         spin_lock(&unix_table_lock);
 263         __unix_remove_socket(sk);
 264         spin_unlock(&unix_table_lock);
 265 }
 266
 267 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 268 {
 269         spin_lock(&unix_table_lock);
 270         __unix_insert_socket(list, sk);
 271         spin_unlock(&unix_table_lock);
 272 }
 273
 274 static struct sock *__unix_find_socket_byname(struct net *net,
 275                                               struct sockaddr_un *sunname,
 276                                               int len, int type, unsigned int hash)
 277 {
 278         struct sock *s;
 279
 280         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 281                 struct unix_sock *u = unix_sk(s);
 282
 283                 if (!net_eq(sock_net(s), net))
 284                         continue;
 285
 286                 if (u->addr->len == len &&
 287                     !memcmp(u->addr->name, sunname, len))
 288                         goto found;
 289         }
 290         s = NULL;
 291 found:
 292         return s;
 293 }
 294
 295 static inline struct sock *unix_find_socket_byname(struct net *net,
 296                                                    struct sockaddr_un *sunname,
 297                                                    int len, int type,
 298                                                    unsigned int hash)
 299 {
 300         struct sock *s;
 301
 302         spin_lock(&unix_table_lock);
 303         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 304         if (s)
 305                 sock_hold(s);
 306         spin_unlock(&unix_table_lock);
 307         return s;
 308 }
 309
 310 static struct sock *unix_find_socket_byinode(struct inode *i)
 311 {
 312         struct sock *s;
 313
 314         spin_lock(&unix_table_lock);
 315         sk_for_each(s,
 316                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 317                 struct dentry *dentry = unix_sk(s)->path.dentry;
 318
 319                 if (dentry && d_backing_inode(dentry) == i) {
 320                         sock_hold(s);
 321                         goto found;
 322                 }
 323         }
 324         s = NULL;
 325 found:
 326         spin_unlock(&unix_table_lock);
 327         return s;
 328 }
 329
 330 /* Support code for asymmetrically connected dgram sockets
 331  *
 332  * If a datagram socket is connected to a socket not itself connected
 333  * to the first socket (eg, /dev/log), clients may only enqueue more
 334  * messages if the present receive queue of the server socket is not
 335  * "too large". This means there's a second writeability condition
 336  * poll and sendmsg need to test. The dgram recv code will do a wake
 337  * up on the peer_wait wait queue of a socket upon reception of a
 338  * datagram which needs to be propagated to sleeping would-be writers
 339  * since these might not have sent anything so far. This can't be
 340  * accomplished via poll_wait because the lifetime of the server
 341  * socket might be less than that of its clients if these break their
 342  * association with it or if the server socket is closed while clients
 343  * are still connected to it and there's no way to inform "a polling
 344  * implementation" that it should let go of a certain wait queue
 345  *
 346  * In order to propagate a wake up, a wait_queue_entry_t of the client
 347  * socket is enqueued on the peer_wait queue of the server socket
 348  * whose wake function does a wake_up on the ordinary client socket
 349  * wait queue. This connection is established whenever a write (or
 350  * poll for write) hit the flow control condition and broken when the
 351  * association to the server socket is dissolved or after a wake up
 352  * was relayed.
 353  */
 354
 355 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 356                                       void *key)
 357 {
 358         struct unix_sock *u;
 359         wait_queue_head_t *u_sleep;
 360
 361         u = container_of(q, struct unix_sock, peer_wake);
 362
 363         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 364                             q);
 365         u->peer_wake.private = NULL;
 366
 367         /* relaying can only happen while the wq still exists */
 368         u_sleep = sk_sleep(&u->sk);
 369         if (u_sleep)
 370                 wake_up_interruptible_poll(u_sleep, key);
 371
 372         return 0;
 373 }
 374
 375 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 376 {
 377         struct unix_sock *u, *u_other;
 378         int rc;
 379
 380         u = unix_sk(sk);
 381         u_other = unix_sk(other);
 382         rc = 0;
 383         spin_lock(&u_other->peer_wait.lock);
 384
 385         if (!u->peer_wake.private) {
 386                 u->peer_wake.private = other;
 387                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 388
 389                 rc = 1;
 390         }
 391
 392         spin_unlock(&u_other->peer_wait.lock);
 393         return rc;
 394 }
 395
 396 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 397                                             struct sock *other)
 398 {
 399         struct unix_sock *u, *u_other;
 400
 401         u = unix_sk(sk);
 402         u_other = unix_sk(other);
 403         spin_lock(&u_other->peer_wait.lock);
 404
 405         if (u->peer_wake.private == other) {
 406                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 407                 u->peer_wake.private = NULL;
 408         }
 409
 410         spin_unlock(&u_other->peer_wait.lock);
 411 }
 412
 413 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 414                                                    struct sock *other)
 415 {
 416         unix_dgram_peer_wake_disconnect(sk, other);
 417         wake_up_interruptible_poll(sk_sleep(sk),
 418                                    POLLOUT |
 419                                    POLLWRNORM |
 420                                    POLLWRBAND);
 421 }
 422
 423 /* preconditions:
 424  *      - unix_peer(sk) == other
 425  *      - association is stable
 426  */
 427 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 428 {
 429         int connected;
 430
 431         connected = unix_dgram_peer_wake_connect(sk, other);
 432
 433         if (unix_recvq_full(other))
 434                 return 1;
 435
 436         if (connected)
 437                 unix_dgram_peer_wake_disconnect(sk, other);
 438
 439         return 0;
 440 }
 441
 442 static int unix_writable(const struct sock *sk)
 443 {
 444         return sk->sk_state != TCP_LISTEN &&
 445                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 446 }
 447
 448 static void unix_write_space(struct sock *sk)
 449 {
 450         struct socket_wq *wq;
 451
 452         rcu_read_lock();
 453         if (unix_writable(sk)) {
 454                 wq = rcu_dereference(sk->sk_wq);
 455                 if (skwq_has_sleeper(wq))
 456                         wake_up_interruptible_sync_poll(&wq->wait,
 457                                 POLLOUT | POLLWRNORM | POLLWRBAND);
 458                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 459         }
 460         rcu_read_unlock();
 461 }
 462
 463 /* When dgram socket disconnects (or changes its peer), we clear its receive
 464  * queue of packets arrived from previous peer. First, it allows to do
 465  * flow control based only on wmem_alloc; second, sk connected to peer
 466  * may receive messages only from that peer. */
 467 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 468 {
 469         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 470                 skb_queue_purge(&sk->sk_receive_queue);
 471                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 472
 473                 /* If one link of bidirectional dgram pipe is disconnected,
 474                  * we signal error. Messages are lost. Do not make this,
 475                  * when peer was not connected to us.
 476                  */
 477                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 478                         other->sk_err = ECONNRESET;
 479                         other->sk_error_report(other);
 480                 }
 481         }
 482 }
 483
 484 static void unix_sock_destructor(struct sock *sk)
 485 {
 486         struct unix_sock *u = unix_sk(sk);
 487
 488         skb_queue_purge(&sk->sk_receive_queue);
 489
 490         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 491         WARN_ON(!sk_unhashed(sk));
 492         WARN_ON(sk->sk_socket);
 493         if (!sock_flag(sk, SOCK_DEAD)) {
 494                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 495                 return;
 496         }
 497
 498         if (u->addr)
 499                 unix_release_addr(u->addr);
 500
 501         atomic_long_dec(&unix_nr_socks);
 502         local_bh_disable();
 503         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 504         local_bh_enable();
 505 #ifdef UNIX_REFCNT_DEBUG
 506         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 507                 atomic_long_read(&unix_nr_socks));
 508 #endif
 509 }
 510
 511 static void unix_release_sock(struct sock *sk, int embrion)
 512 {
 513         struct unix_sock *u = unix_sk(sk);
 514         struct path path;
 515         struct sock *skpair;
 516         struct sk_buff *skb;
 517         int state;
 518
 519         unix_remove_socket(sk);
 520
 521         /* Clear state */
 522         unix_state_lock(sk);
 523         sock_orphan(sk);
 524         sk->sk_shutdown = SHUTDOWN_MASK;
 525         path         = u->path;
 526         u->path.dentry = NULL;
 527         u->path.mnt = NULL;
 528         state = sk->sk_state;
 529         sk->sk_state = TCP_CLOSE;
 530         unix_state_unlock(sk);
 531
 532         wake_up_interruptible_all(&u->peer_wait);
 533
 534         skpair = unix_peer(sk);
 535
 536         if (skpair != NULL) {
 537                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 538                         unix_state_lock(skpair);
 539                         /* No more writes */
 540                         skpair->sk_shutdown = SHUTDOWN_MASK;
 541                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 542                                 skpair->sk_err = ECONNRESET;
 543                         unix_state_unlock(skpair);
 544                         skpair->sk_state_change(skpair);
 545                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 546                 }
 547
 548                 unix_dgram_peer_wake_disconnect(sk, skpair);
 549                 sock_put(skpair); /* It may now die */
 550                 unix_peer(sk) = NULL;
 551         }
 552
 553         /* Try to flush out this socket. Throw out buffers at least */
 554
 555         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 556                 if (state == TCP_LISTEN)
 557                         unix_release_sock(skb->sk, 1);
 558                 /* passed fds are erased in the kfree_skb hook        */
 559                 UNIXCB(skb).consumed = skb->len;
 560                 kfree_skb(skb);
 561         }
 562
 563         if (path.dentry)
 564                 path_put(&path);
 565
 566         sock_put(sk);
 567
 568         /* ---- Socket is dead now and most probably destroyed ---- */
 569
 570         /*
 571          * Fixme: BSD difference: In BSD all sockets connected to us get
 572          *        ECONNRESET and we die on the spot. In Linux we behave
 573          *        like files and pipes do and wait for the last
 574          *        dereference.
 575          *
 576          * Can't we simply set sock->err?
 577          *
 578          *        What the above comment does talk about? --ANK(980817)
 579          */
 580
 581         if (unix_tot_inflight)
 582                 unix_gc();              /* Garbage collect fds */
 583 }
 584
 585 static void init_peercred(struct sock *sk)
 586 {
 587         put_pid(sk->sk_peer_pid);
 588         if (sk->sk_peer_cred)
 589                 put_cred(sk->sk_peer_cred);
 590         sk->sk_peer_pid  = get_pid(task_tgid(current));
 591         sk->sk_peer_cred = get_current_cred();
 592 }
 593
 594 static void copy_peercred(struct sock *sk, struct sock *peersk)
 595 {
 596         put_pid(sk->sk_peer_pid);
 597         if (sk->sk_peer_cred)
 598                 put_cred(sk->sk_peer_cred);
 599         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 600         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 601 }
 602
 603 static int unix_listen(struct socket *sock, int backlog)
 604 {
 605         int err;
 606         struct sock *sk = sock->sk;
 607         struct unix_sock *u = unix_sk(sk);
 608         struct pid *old_pid = NULL;
 609
 610         err = -EOPNOTSUPP;
 611         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 612                 goto out;       /* Only stream/seqpacket sockets accept */
 613         err = -EINVAL;
 614         if (!u->addr)
 615                 goto out;       /* No listens on an unbound socket */
 616         unix_state_lock(sk);
 617         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 618                 goto out_unlock;
 619         if (backlog > sk->sk_max_ack_backlog)
 620                 wake_up_interruptible_all(&u->peer_wait);
 621         sk->sk_max_ack_backlog  = backlog;
 622         sk->sk_state            = TCP_LISTEN;
 623         /* set credentials so connect can copy them */
 624         init_peercred(sk);
 625         err = 0;
 626
 627 out_unlock:
 628         unix_state_unlock(sk);
 629         put_pid(old_pid);
 630 out:
 631         return err;
 632 }
 633
 634 static int unix_release(struct socket *);
 635 static int unix_bind(struct socket *, struct sockaddr *, int);
 636 static int unix_stream_connect(struct socket *, struct sockaddr *,
 637                                int addr_len, int flags);
 638 static int unix_socketpair(struct socket *, struct socket *);
 639 static int unix_accept(struct socket *, struct socket *, int, bool);
 640 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 641 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 642 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 643                                     poll_table *);
 644 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 645 static int unix_shutdown(struct socket *, int);
 646 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 647 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 648 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 649                                     size_t size, int flags);
 650 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 651                                        struct pipe_inode_info *, size_t size,
 652                                        unsigned int flags);
 653 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 654 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 655 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 656                               int, int);
 657 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 658 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 659                                   int);
 660
 661 static int unix_set_peek_off(struct sock *sk, int val)
 662 {
 663         struct unix_sock *u = unix_sk(sk);
 664
 665         if (mutex_lock_interruptible(&u->iolock))
 666                 return -EINTR;
 667
 668         sk->sk_peek_off = val;
 669         mutex_unlock(&u->iolock);
 670
 671         return 0;
 672 }
 673
 674
 675 static const struct proto_ops unix_stream_ops = {
 676         .family =       PF_UNIX,
 677         .owner =        THIS_MODULE,
 678         .release =      unix_release,
 679         .bind =         unix_bind,
 680         .connect =      unix_stream_connect,
 681         .socketpair =   unix_socketpair,
 682         .accept =       unix_accept,
 683         .getname =      unix_getname,
 684         .poll =         unix_poll,
 685         .ioctl =        unix_ioctl,
 686         .listen =       unix_listen,
 687         .shutdown =     unix_shutdown,
 688         .setsockopt =   sock_no_setsockopt,
 689         .getsockopt =   sock_no_getsockopt,
 690         .sendmsg =      unix_stream_sendmsg,
 691         .recvmsg =      unix_stream_recvmsg,
 692         .mmap =         sock_no_mmap,
 693         .sendpage =     unix_stream_sendpage,
 694         .splice_read =  unix_stream_splice_read,
 695         .set_peek_off = unix_set_peek_off,
 696 };
 697
 698 static const struct proto_ops unix_dgram_ops = {
 699         .family =       PF_UNIX,
 700         .owner =        THIS_MODULE,
 701         .release =      unix_release,
 702         .bind =         unix_bind,
 703         .connect =      unix_dgram_connect,
 704         .socketpair =   unix_socketpair,
 705         .accept =       sock_no_accept,
 706         .getname =      unix_getname,
 707         .poll =         unix_dgram_poll,
 708         .ioctl =        unix_ioctl,
 709         .listen =       sock_no_listen,
 710         .shutdown =     unix_shutdown,
 711         .setsockopt =   sock_no_setsockopt,
 712         .getsockopt =   sock_no_getsockopt,
 713         .sendmsg =      unix_dgram_sendmsg,
 714         .recvmsg =      unix_dgram_recvmsg,
 715         .mmap =         sock_no_mmap,
 716         .sendpage =     sock_no_sendpage,
 717         .set_peek_off = unix_set_peek_off,
 718 };
 719
 720 static const struct proto_ops unix_seqpacket_ops = {
 721         .family =       PF_UNIX,
 722         .owner =        THIS_MODULE,
 723         .release =      unix_release,
 724         .bind =         unix_bind,
 725         .connect =      unix_stream_connect,
 726         .socketpair =   unix_socketpair,
 727         .accept =       unix_accept,
 728         .getname =      unix_getname,
 729         .poll =         unix_dgram_poll,
 730         .ioctl =        unix_ioctl,
 731         .listen =       unix_listen,
 732         .shutdown =     unix_shutdown,
 733         .setsockopt =   sock_no_setsockopt,
 734         .getsockopt =   sock_no_getsockopt,
 735         .sendmsg =      unix_seqpacket_sendmsg,
 736         .recvmsg =      unix_seqpacket_recvmsg,
 737         .mmap =         sock_no_mmap,
 738         .sendpage =     sock_no_sendpage,
 739         .set_peek_off = unix_set_peek_off,
 740 };
 741
 742 static struct proto unix_proto = {
 743         .name                   = "UNIX",
 744         .owner                  = THIS_MODULE,
 745         .obj_size               = sizeof(struct unix_sock),
 746 };
 747
 748 /*
 749  * AF_UNIX sockets do not interact with hardware, hence they
 750  * dont trigger interrupts - so it's safe for them to have
 751  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 752  * this special lock-class by reinitializing the spinlock key:
 753  */
 754 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 755
 756 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 757 {
 758         struct sock *sk = NULL;
 759         struct unix_sock *u;
 760
 761         atomic_long_inc(&unix_nr_socks);
 762         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 763                 goto out;
 764
 765         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 766         if (!sk)
 767                 goto out;
 768
 769         sock_init_data(sock, sk);
 770         lockdep_set_class(&sk->sk_receive_queue.lock,
 771                                 &af_unix_sk_receive_queue_lock_key);
 772
 773         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 774         sk->sk_write_space      = unix_write_space;
 775         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 776         sk->sk_destruct         = unix_sock_destructor;
 777         u         = unix_sk(sk);
 778         u->path.dentry = NULL;
 779         u->path.mnt = NULL;
 780         spin_lock_init(&u->lock);
 781         atomic_long_set(&u->inflight, 0);
 782         INIT_LIST_HEAD(&u->link);
 783         mutex_init(&u->iolock); /* single task reading lock */
 784         mutex_init(&u->bindlock); /* single task binding lock */
 785         init_waitqueue_head(&u->peer_wait);
 786         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 787         unix_insert_socket(unix_sockets_unbound(sk), sk);
 788 out:
 789         if (sk == NULL)
 790                 atomic_long_dec(&unix_nr_socks);
 791         else {
 792                 local_bh_disable();
 793                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 794                 local_bh_enable();
 795         }
 796         return sk;
 797 }
 798
 799 static int unix_create(struct net *net, struct socket *sock, int protocol,
 800                        int kern)
 801 {
 802         if (protocol && protocol != PF_UNIX)
 803                 return -EPROTONOSUPPORT;
 804
 805         sock->state = SS_UNCONNECTED;
 806
 807         switch (sock->type) {
 808         case SOCK_STREAM:
 809                 sock->ops = &unix_stream_ops;
 810                 break;
 811                 /*
 812                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 813                  *      nothing uses it.
 814                  */
 815         case SOCK_RAW:
 816                 sock->type = SOCK_DGRAM;
 817         case SOCK_DGRAM:
 818                 sock->ops = &unix_dgram_ops;
 819                 break;
 820         case SOCK_SEQPACKET:
 821                 sock->ops = &unix_seqpacket_ops;
 822                 break;
 823         default:
 824                 return -ESOCKTNOSUPPORT;
 825         }
 826
 827         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 828 }
 829
 830 static int unix_release(struct socket *sock)
 831 {
 832         struct sock *sk = sock->sk;
 833
 834         if (!sk)
 835                 return 0;
 836
 837         unix_release_sock(sk, 0);
 838         sock->sk = NULL;
 839
 840         return 0;
 841 }
 842
 843 static int unix_autobind(struct socket *sock)
 844 {
 845         struct sock *sk = sock->sk;
 846         struct net *net = sock_net(sk);
 847         struct unix_sock *u = unix_sk(sk);
 848         static u32 ordernum = 1;
 849         struct unix_address *addr;
 850         int err;
 851         unsigned int retries = 0;
 852
 853         err = mutex_lock_interruptible(&u->bindlock);
 854         if (err)
 855                 return err;
 856
 857         err = 0;
 858         if (u->addr)
 859                 goto out;
 860
 861         err = -ENOMEM;
 862         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 863         if (!addr)
 864                 goto out;
 865
 866         addr->name->sun_family = AF_UNIX;
 867         refcount_set(&addr->refcnt, 1);
 868
 869 retry:
 870         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 871         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 872
 873         spin_lock(&unix_table_lock);
 874         ordernum = (ordernum+1)&0xFFFFF;
 875
 876         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 877                                       addr->hash)) {
 878                 spin_unlock(&unix_table_lock);
 879                 /*
 880                  * __unix_find_socket_byname() may take long time if many names
 881                  * are already in use.
 882                  */
 883                 cond_resched();
 884                 /* Give up if all names seems to be in use. */
 885                 if (retries++ == 0xFFFFF) {
 886                         err = -ENOSPC;
 887                         kfree(addr);
 888                         goto out;
 889                 }
 890                 goto retry;
 891         }
 892         addr->hash ^= sk->sk_type;
 893
 894         __unix_remove_socket(sk);
 895         u->addr = addr;
 896         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 897         spin_unlock(&unix_table_lock);
 898         err = 0;
 899
 900 out:    mutex_unlock(&u->bindlock);
 901         return err;
 902 }
 903
 904 static struct sock *unix_find_other(struct net *net,
 905                                     struct sockaddr_un *sunname, int len,
 906                                     int type, unsigned int hash, int *error)
 907 {
 908         struct sock *u;
 909         struct path path;
 910         int err = 0;
 911
 912         if (sunname->sun_path[0]) {
 913                 struct inode *inode;
 914                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 915                 if (err)
 916                         goto fail;
 917                 inode = d_backing_inode(path.dentry);
 918                 err = inode_permission(inode, MAY_WRITE);
 919                 if (err)
 920                         goto put_fail;
 921
 922                 err = -ECONNREFUSED;
 923                 if (!S_ISSOCK(inode->i_mode))
 924                         goto put_fail;
 925                 u = unix_find_socket_byinode(inode);
 926                 if (!u)
 927                         goto put_fail;
 928
 929                 if (u->sk_type == type)
 930                         touch_atime(&path);
 931
 932                 path_put(&path);
 933
 934                 err = -EPROTOTYPE;
 935                 if (u->sk_type != type) {
 936                         sock_put(u);
 937                         goto fail;
 938                 }
 939         } else {
 940                 err = -ECONNREFUSED;
 941                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 942                 if (u) {
 943                         struct dentry *dentry;
 944                         dentry = unix_sk(u)->path.dentry;
 945                         if (dentry)
 946                                 touch_atime(&unix_sk(u)->path);
 947                 } else
 948                         goto fail;
 949         }
 950         return u;
 951
 952 put_fail:
 953         path_put(&path);
 954 fail:
 955         *error = err;
 956         return NULL;
 957 }
 958
 959 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 960 {
 961         struct dentry *dentry;
 962         struct path path;
 963         int err = 0;
 964         /*
 965          * Get the parent directory, calculate the hash for last
 966          * component.
 967          */
 968         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 969         err = PTR_ERR(dentry);
 970         if (IS_ERR(dentry))
 971                 return err;
 972
 973         /*
 974          * All right, let's create it.
 975          */
 976         err = security_path_mknod(&path, dentry, mode, 0);
 977         if (!err) {
 978                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 979                 if (!err) {
 980                         res->mnt = mntget(path.mnt);
 981                         res->dentry = dget(dentry);
 982                 }
 983         }
 984         done_path_create(&path, dentry);
 985         return err;
 986 }
 987
 988 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 989 {
 990         struct sock *sk = sock->sk;
 991         struct net *net = sock_net(sk);
 992         struct unix_sock *u = unix_sk(sk);
 993         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 994         char *sun_path = sunaddr->sun_path;
 995         int err;
 996         unsigned int hash;
 997         struct unix_address *addr;
 998         struct hlist_head *list;
 999         struct path path = { };
1000
1001         err = -EINVAL;
1002         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1003             sunaddr->sun_family != AF_UNIX)
1004                 goto out;
1005
1006         if (addr_len == sizeof(short)) {
1007                 err = unix_autobind(sock);
1008                 goto out;
1009         }
1010
1011         err = unix_mkname(sunaddr, addr_len, &hash);
1012         if (err < 0)
1013                 goto out;
1014         addr_len = err;
1015
1016         if (sun_path[0]) {
1017                 umode_t mode = S_IFSOCK |
1018                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1019                 err = unix_mknod(sun_path, mode, &path);
1020                 if (err) {
1021                         if (err == -EEXIST)
1022                                 err = -EADDRINUSE;
1023                         goto out;
1024                 }
1025         }
1026
1027         err = mutex_lock_interruptible(&u->bindlock);
1028         if (err)
1029                 goto out_put;
1030
1031         err = -EINVAL;
1032         if (u->addr)
1033                 goto out_up;
1034
1035         err = -ENOMEM;
1036         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1037         if (!addr)
1038                 goto out_up;
1039
1040         memcpy(addr->name, sunaddr, addr_len);
1041         addr->len = addr_len;
1042         addr->hash = hash ^ sk->sk_type;
1043         refcount_set(&addr->refcnt, 1);
1044
1045         if (sun_path[0]) {
1046                 addr->hash = UNIX_HASH_SIZE;
1047                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1048                 spin_lock(&unix_table_lock);
1049                 u->path = path;
1050                 list = &unix_socket_table[hash];
1051         } else {
1052                 spin_lock(&unix_table_lock);
1053                 err = -EADDRINUSE;
1054                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1055                                               sk->sk_type, hash)) {
1056                         unix_release_addr(addr);
1057                         goto out_unlock;
1058                 }
1059
1060                 list = &unix_socket_table[addr->hash];
1061         }
1062
1063         err = 0;
1064         __unix_remove_socket(sk);
1065         u->addr = addr;
1066         __unix_insert_socket(list, sk);
1067
1068 out_unlock:
1069         spin_unlock(&unix_table_lock);
1070 out_up:
1071         mutex_unlock(&u->bindlock);
1072 out_put:
1073         if (err)
1074                 path_put(&path);
1075 out:
1076         return err;
1077 }
1078
1079 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1080 {
1081         if (unlikely(sk1 == sk2) || !sk2) {
1082                 unix_state_lock(sk1);
1083                 return;
1084         }
1085         if (sk1 < sk2) {
1086                 unix_state_lock(sk1);
1087                 unix_state_lock_nested(sk2);
1088         } else {
1089                 unix_state_lock(sk2);
1090                 unix_state_lock_nested(sk1);
1091         }
1092 }
1093
1094 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1095 {
1096         if (unlikely(sk1 == sk2) || !sk2) {
1097                 unix_state_unlock(sk1);
1098                 return;
1099         }
1100         unix_state_unlock(sk1);
1101         unix_state_unlock(sk2);
1102 }
1103
1104 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1105                               int alen, int flags)
1106 {
1107         struct sock *sk = sock->sk;
1108         struct net *net = sock_net(sk);
1109         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1110         struct sock *other;
1111         unsigned int hash;
1112         int err;
1113
1114         err = -EINVAL;
1115         if (alen < offsetofend(struct sockaddr, sa_family))
1116                 goto out;
1117
1118         if (addr->sa_family != AF_UNSPEC) {
1119                 err = unix_mkname(sunaddr, alen, &hash);
1120                 if (err < 0)
1121                         goto out;
1122                 alen = err;
1123
1124                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1125                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1126                         goto out;
1127
1128 restart:
1129                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1130                 if (!other)
1131                         goto out;
1132
1133                 unix_state_double_lock(sk, other);
1134
1135                 /* Apparently VFS overslept socket death. Retry. */
1136                 if (sock_flag(other, SOCK_DEAD)) {
1137                         unix_state_double_unlock(sk, other);
1138                         sock_put(other);
1139                         goto restart;
1140                 }
1141
1142                 err = -EPERM;
1143                 if (!unix_may_send(sk, other))
1144                         goto out_unlock;
1145
1146                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1147                 if (err)
1148                         goto out_unlock;
1149
1150         } else {
1151                 /*
1152                  *      1003.1g breaking connected state with AF_UNSPEC
1153                  */
1154                 other = NULL;
1155                 unix_state_double_lock(sk, other);
1156         }
1157
1158         /*
1159          * If it was connected, reconnect.
1160          */
1161         if (unix_peer(sk)) {
1162                 struct sock *old_peer = unix_peer(sk);
1163                 unix_peer(sk) = other;
1164                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1165
1166                 unix_state_double_unlock(sk, other);
1167
1168                 if (other != old_peer)
1169                         unix_dgram_disconnected(sk, old_peer);
1170                 sock_put(old_peer);
1171         } else {
1172                 unix_peer(sk) = other;
1173                 unix_state_double_unlock(sk, other);
1174         }
1175         return 0;
1176
1177 out_unlock:
1178         unix_state_double_unlock(sk, other);
1179         sock_put(other);
1180 out:
1181         return err;
1182 }
1183
1184 static long unix_wait_for_peer(struct sock *other, long timeo)
1185 {
1186         struct unix_sock *u = unix_sk(other);
1187         int sched;
1188         DEFINE_WAIT(wait);
1189
1190         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1191
1192         sched = !sock_flag(other, SOCK_DEAD) &&
1193                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1194                 unix_recvq_full(other);
1195
1196         unix_state_unlock(other);
1197
1198         if (sched)
1199                 timeo = schedule_timeout(timeo);
1200
1201         finish_wait(&u->peer_wait, &wait);
1202         return timeo;
1203 }
1204
1205 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1206                                int addr_len, int flags)
1207 {
1208         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1209         struct sock *sk = sock->sk;
1210         struct net *net = sock_net(sk);
1211         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1212         struct sock *newsk = NULL;
1213         struct sock *other = NULL;
1214         struct sk_buff *skb = NULL;
1215         unsigned int hash;
1216         int st;
1217         int err;
1218         long timeo;
1219
1220         err = unix_mkname(sunaddr, addr_len, &hash);
1221         if (err < 0)
1222                 goto out;
1223         addr_len = err;
1224
1225         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1226             (err = unix_autobind(sock)) != 0)
1227                 goto out;
1228
1229         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1230
1231         /* First of all allocate resources.
1232            If we will make it after state is locked,
1233            we will have to recheck all again in any case.
1234          */
1235
1236         err = -ENOMEM;
1237
1238         /* create new sock for complete connection */
1239         newsk = unix_create1(sock_net(sk), NULL, 0);
1240         if (newsk == NULL)
1241                 goto out;
1242
1243         /* Allocate skb for sending to listening sock */
1244         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1245         if (skb == NULL)
1246                 goto out;
1247
1248 restart:
1249         /*  Find listening sock. */
1250         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1251         if (!other)
1252                 goto out;
1253
1254         /* Latch state of peer */
1255         unix_state_lock(other);
1256
1257         /* Apparently VFS overslept socket death. Retry. */
1258         if (sock_flag(other, SOCK_DEAD)) {
1259                 unix_state_unlock(other);
1260                 sock_put(other);
1261                 goto restart;
1262         }
1263
1264         err = -ECONNREFUSED;
1265         if (other->sk_state != TCP_LISTEN)
1266                 goto out_unlock;
1267         if (other->sk_shutdown & RCV_SHUTDOWN)
1268                 goto out_unlock;
1269
1270         if (unix_recvq_full(other)) {
1271                 err = -EAGAIN;
1272                 if (!timeo)
1273                         goto out_unlock;
1274
1275                 timeo = unix_wait_for_peer(other, timeo);
1276
1277                 err = sock_intr_errno(timeo);
1278                 if (signal_pending(current))
1279                         goto out;
1280                 sock_put(other);
1281                 goto restart;
1282         }
1283
1284         /* Latch our state.
1285
1286            It is tricky place. We need to grab our state lock and cannot
1287            drop lock on peer. It is dangerous because deadlock is
1288            possible. Connect to self case and simultaneous
1289            attempt to connect are eliminated by checking socket
1290            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1291            check this before attempt to grab lock.
1292
1293            Well, and we have to recheck the state after socket locked.
1294          */
1295         st = sk->sk_state;
1296
1297         switch (st) {
1298         case TCP_CLOSE:
1299                 /* This is ok... continue with connect */
1300                 break;
1301         case TCP_ESTABLISHED:
1302                 /* Socket is already connected */
1303                 err = -EISCONN;
1304                 goto out_unlock;
1305         default:
1306                 err = -EINVAL;
1307                 goto out_unlock;
1308         }
1309
1310         unix_state_lock_nested(sk);
1311
1312         if (sk->sk_state != st) {
1313                 unix_state_unlock(sk);
1314                 unix_state_unlock(other);
1315                 sock_put(other);
1316                 goto restart;
1317         }
1318
1319         err = security_unix_stream_connect(sk, other, newsk);
1320         if (err) {
1321                 unix_state_unlock(sk);
1322                 goto out_unlock;
1323         }
1324
1325         /* The way is open! Fastly set all the necessary fields... */
1326
1327         sock_hold(sk);
1328         unix_peer(newsk)        = sk;
1329         newsk->sk_state         = TCP_ESTABLISHED;
1330         newsk->sk_type          = sk->sk_type;
1331         init_peercred(newsk);
1332         newu = unix_sk(newsk);
1333         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1334         otheru = unix_sk(other);
1335
1336         /* copy address information from listening to new sock*/
1337         if (otheru->addr) {
1338                 refcount_inc(&otheru->addr->refcnt);
1339                 newu->addr = otheru->addr;
1340         }
1341         if (otheru->path.dentry) {
1342                 path_get(&otheru->path);
1343                 newu->path = otheru->path;
1344         }
1345
1346         /* Set credentials */
1347         copy_peercred(sk, other);
1348
1349         sock->state     = SS_CONNECTED;
1350         sk->sk_state    = TCP_ESTABLISHED;
1351         sock_hold(newsk);
1352
1353         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1354         unix_peer(sk)   = newsk;
1355
1356         unix_state_unlock(sk);
1357
1358         /* take ten and and send info to listening sock */
1359         spin_lock(&other->sk_receive_queue.lock);
1360         __skb_queue_tail(&other->sk_receive_queue, skb);
1361         spin_unlock(&other->sk_receive_queue.lock);
1362         unix_state_unlock(other);
1363         other->sk_data_ready(other);
1364         sock_put(other);
1365         return 0;
1366
1367 out_unlock:
1368         if (other)
1369                 unix_state_unlock(other);
1370
1371 out:
1372         kfree_skb(skb);
1373         if (newsk)
1374                 unix_release_sock(newsk, 0);
1375         if (other)
1376                 sock_put(other);
1377         return err;
1378 }
1379
1380 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1381 {
1382         struct sock *ska = socka->sk, *skb = sockb->sk;
1383
1384         /* Join our sockets back to back */
1385         sock_hold(ska);
1386         sock_hold(skb);
1387         unix_peer(ska) = skb;
1388         unix_peer(skb) = ska;
1389         init_peercred(ska);
1390         init_peercred(skb);
1391
1392         if (ska->sk_type != SOCK_DGRAM) {
1393                 ska->sk_state = TCP_ESTABLISHED;
1394                 skb->sk_state = TCP_ESTABLISHED;
1395                 socka->state  = SS_CONNECTED;
1396                 sockb->state  = SS_CONNECTED;
1397         }
1398         return 0;
1399 }
1400
1401 static void unix_sock_inherit_flags(const struct socket *old,
1402                                     struct socket *new)
1403 {
1404         if (test_bit(SOCK_PASSCRED, &old->flags))
1405                 set_bit(SOCK_PASSCRED, &new->flags);
1406         if (test_bit(SOCK_PASSSEC, &old->flags))
1407                 set_bit(SOCK_PASSSEC, &new->flags);
1408 }
1409
1410 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1411                        bool kern)
1412 {
1413         struct sock *sk = sock->sk;
1414         struct sock *tsk;
1415         struct sk_buff *skb;
1416         int err;
1417
1418         err = -EOPNOTSUPP;
1419         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1420                 goto out;
1421
1422         err = -EINVAL;
1423         if (sk->sk_state != TCP_LISTEN)
1424                 goto out;
1425
1426         /* If socket state is TCP_LISTEN it cannot change (for now...),
1427          * so that no locks are necessary.
1428          */
1429
1430         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1431         if (!skb) {
1432                 /* This means receive shutdown. */
1433                 if (err == 0)
1434                         err = -EINVAL;
1435                 goto out;
1436         }
1437
1438         tsk = skb->sk;
1439         skb_free_datagram(sk, skb);
1440         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1441
1442         /* attach accepted sock to socket */
1443         unix_state_lock(tsk);
1444         newsock->state = SS_CONNECTED;
1445         unix_sock_inherit_flags(sock, newsock);
1446         sock_graft(tsk, newsock);
1447         unix_state_unlock(tsk);
1448         return 0;
1449
1450 out:
1451         return err;
1452 }
1453
1454
1455 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1456 {
1457         struct sock *sk = sock->sk;
1458         struct unix_sock *u;
1459         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1460         int err = 0;
1461
1462         if (peer) {
1463                 sk = unix_peer_get(sk);
1464
1465                 err = -ENOTCONN;
1466                 if (!sk)
1467                         goto out;
1468                 err = 0;
1469         } else {
1470                 sock_hold(sk);
1471         }
1472
1473         u = unix_sk(sk);
1474         unix_state_lock(sk);
1475         if (!u->addr) {
1476                 sunaddr->sun_family = AF_UNIX;
1477                 sunaddr->sun_path[0] = 0;
1478                 *uaddr_len = sizeof(short);
1479         } else {
1480                 struct unix_address *addr = u->addr;
1481
1482                 *uaddr_len = addr->len;
1483                 memcpy(sunaddr, addr->name, *uaddr_len);
1484         }
1485         unix_state_unlock(sk);
1486         sock_put(sk);
1487 out:
1488         return err;
1489 }
1490
1491 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1492 {
1493         int i;
1494
1495         scm->fp = UNIXCB(skb).fp;
1496         UNIXCB(skb).fp = NULL;
1497
1498         for (i = scm->fp->count-1; i >= 0; i--)
1499                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1500 }
1501
1502 static void unix_destruct_scm(struct sk_buff *skb)
1503 {
1504         struct scm_cookie scm;
1505         memset(&scm, 0, sizeof(scm));
1506         scm.pid  = UNIXCB(skb).pid;
1507         if (UNIXCB(skb).fp)
1508                 unix_detach_fds(&scm, skb);
1509
1510         /* Alas, it calls VFS */
1511         /* So fscking what? fput() had been SMP-safe since the last Summer */
1512         scm_destroy(&scm);
1513         sock_wfree(skb);
1514 }
1515
1516 /*
1517  * The "user->unix_inflight" variable is protected by the garbage
1518  * collection lock, and we just read it locklessly here. If you go
1519  * over the limit, there might be a tiny race in actually noticing
1520  * it across threads. Tough.
1521  */
1522 static inline bool too_many_unix_fds(struct task_struct *p)
1523 {
1524         struct user_struct *user = current_user();
1525
1526         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1527                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1528         return false;
1529 }
1530
1531 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1532 {
1533         int i;
1534
1535         if (too_many_unix_fds(current))
1536                 return -ETOOMANYREFS;
1537
1538         /*
1539          * Need to duplicate file references for the sake of garbage
1540          * collection.  Otherwise a socket in the fps might become a
1541          * candidate for GC while the skb is not yet queued.
1542          */
1543         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1544         if (!UNIXCB(skb).fp)
1545                 return -ENOMEM;
1546
1547         for (i = scm->fp->count - 1; i >= 0; i--)
1548                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1549         return 0;
1550 }
1551
1552 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1553 {
1554         int err = 0;
1555
1556         UNIXCB(skb).pid  = get_pid(scm->pid);
1557         UNIXCB(skb).uid = scm->creds.uid;
1558         UNIXCB(skb).gid = scm->creds.gid;
1559         UNIXCB(skb).fp = NULL;
1560         unix_get_secdata(scm, skb);
1561         if (scm->fp && send_fds)
1562                 err = unix_attach_fds(scm, skb);
1563
1564         skb->destructor = unix_destruct_scm;
1565         return err;
1566 }
1567
1568 static bool unix_passcred_enabled(const struct socket *sock,
1569                                   const struct sock *other)
1570 {
1571         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1572                !other->sk_socket ||
1573                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1574 }
1575
1576 /*
1577  * Some apps rely on write() giving SCM_CREDENTIALS
1578  * We include credentials if source or destination socket
1579  * asserted SOCK_PASSCRED.
1580  */
1581 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1582                             const struct sock *other)
1583 {
1584         if (UNIXCB(skb).pid)
1585                 return;
1586         if (unix_passcred_enabled(sock, other)) {
1587                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1588                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1589         }
1590 }
1591
1592 static int maybe_init_creds(struct scm_cookie *scm,
1593                             struct socket *socket,
1594                             const struct sock *other)
1595 {
1596         int err;
1597         struct msghdr msg = { .msg_controllen = 0 };
1598
1599         err = scm_send(socket, &msg, scm, false);
1600         if (err)
1601                 return err;
1602
1603         if (unix_passcred_enabled(socket, other)) {
1604                 scm->pid = get_pid(task_tgid(current));
1605                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1606         }
1607         return err;
1608 }
1609
1610 static bool unix_skb_scm_eq(struct sk_buff *skb,
1611                             struct scm_cookie *scm)
1612 {
1613         const struct unix_skb_parms *u = &UNIXCB(skb);
1614
1615         return u->pid == scm->pid &&
1616                uid_eq(u->uid, scm->creds.uid) &&
1617                gid_eq(u->gid, scm->creds.gid) &&
1618                unix_secdata_eq(scm, skb);
1619 }
1620
1621 /*
1622  *      Send AF_UNIX data.
1623  */
1624
1625 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1626                               size_t len)
1627 {
1628         struct sock *sk = sock->sk;
1629         struct net *net = sock_net(sk);
1630         struct unix_sock *u = unix_sk(sk);
1631         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1632         struct sock *other = NULL;
1633         int namelen = 0; /* fake GCC */
1634         int err;
1635         unsigned int hash;
1636         struct sk_buff *skb;
1637         long timeo;
1638         struct scm_cookie scm;
1639         int data_len = 0;
1640         int sk_locked;
1641
1642         wait_for_unix_gc();
1643         err = scm_send(sock, msg, &scm, false);
1644         if (err < 0)
1645                 return err;
1646
1647         err = -EOPNOTSUPP;
1648         if (msg->msg_flags&MSG_OOB)
1649                 goto out;
1650
1651         if (msg->msg_namelen) {
1652                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1653                 if (err < 0)
1654                         goto out;
1655                 namelen = err;
1656         } else {
1657                 sunaddr = NULL;
1658                 err = -ENOTCONN;
1659                 other = unix_peer_get(sk);
1660                 if (!other)
1661                         goto out;
1662         }
1663
1664         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1665             && (err = unix_autobind(sock)) != 0)
1666                 goto out;
1667
1668         err = -EMSGSIZE;
1669         if (len > sk->sk_sndbuf - 32)
1670                 goto out;
1671
1672         if (len > SKB_MAX_ALLOC) {
1673                 data_len = min_t(size_t,
1674                                  len - SKB_MAX_ALLOC,
1675                                  MAX_SKB_FRAGS * PAGE_SIZE);
1676                 data_len = PAGE_ALIGN(data_len);
1677
1678                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1679         }
1680
1681         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1682                                    msg->msg_flags & MSG_DONTWAIT, &err,
1683                                    PAGE_ALLOC_COSTLY_ORDER);
1684         if (skb == NULL)
1685                 goto out;
1686
1687         err = unix_scm_to_skb(&scm, skb, true);
1688         if (err < 0)
1689                 goto out_free;
1690
1691         skb_put(skb, len - data_len);
1692         skb->data_len = data_len;
1693         skb->len = len;
1694         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1695         if (err)
1696                 goto out_free;
1697
1698         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1699
1700 restart:
1701         if (!other) {
1702                 err = -ECONNRESET;
1703                 if (sunaddr == NULL)
1704                         goto out_free;
1705
1706                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1707                                         hash, &err);
1708                 if (other == NULL)
1709                         goto out_free;
1710         }
1711
1712         if (sk_filter(other, skb) < 0) {
1713                 /* Toss the packet but do not return any error to the sender */
1714                 err = len;
1715                 goto out_free;
1716         }
1717
1718         sk_locked = 0;
1719         unix_state_lock(other);
1720 restart_locked:
1721         err = -EPERM;
1722         if (!unix_may_send(sk, other))
1723                 goto out_unlock;
1724
1725         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1726                 /*
1727                  *      Check with 1003.1g - what should
1728                  *      datagram error
1729                  */
1730                 unix_state_unlock(other);
1731                 sock_put(other);
1732
1733                 if (!sk_locked)
1734                         unix_state_lock(sk);
1735
1736                 err = 0;
1737                 if (unix_peer(sk) == other) {
1738                         unix_peer(sk) = NULL;
1739                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1740
1741                         unix_state_unlock(sk);
1742
1743                         unix_dgram_disconnected(sk, other);
1744                         sock_put(other);
1745                         err = -ECONNREFUSED;
1746                 } else {
1747                         unix_state_unlock(sk);
1748                 }
1749
1750                 other = NULL;
1751                 if (err)
1752                         goto out_free;
1753                 goto restart;
1754         }
1755
1756         err = -EPIPE;
1757         if (other->sk_shutdown & RCV_SHUTDOWN)
1758                 goto out_unlock;
1759
1760         if (sk->sk_type != SOCK_SEQPACKET) {
1761                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1762                 if (err)
1763                         goto out_unlock;
1764         }
1765
1766         /* other == sk && unix_peer(other) != sk if
1767          * - unix_peer(sk) == NULL, destination address bound to sk
1768          * - unix_peer(sk) == sk by time of get but disconnected before lock
1769          */
1770         if (other != sk &&
1771             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1772                 if (timeo) {
1773                         timeo = unix_wait_for_peer(other, timeo);
1774
1775                         err = sock_intr_errno(timeo);
1776                         if (signal_pending(current))
1777                                 goto out_free;
1778
1779                         goto restart;
1780                 }
1781
1782                 if (!sk_locked) {
1783                         unix_state_unlock(other);
1784                         unix_state_double_lock(sk, other);
1785                 }
1786
1787                 if (unix_peer(sk) != other ||
1788                     unix_dgram_peer_wake_me(sk, other)) {
1789                         err = -EAGAIN;
1790                         sk_locked = 1;
1791                         goto out_unlock;
1792                 }
1793
1794                 if (!sk_locked) {
1795                         sk_locked = 1;
1796                         goto restart_locked;
1797                 }
1798         }
1799
1800         if (unlikely(sk_locked))
1801                 unix_state_unlock(sk);
1802
1803         if (sock_flag(other, SOCK_RCVTSTAMP))
1804                 __net_timestamp(skb);
1805         maybe_add_creds(skb, sock, other);
1806         skb_queue_tail(&other->sk_receive_queue, skb);
1807         unix_state_unlock(other);
1808         other->sk_data_ready(other);
1809         sock_put(other);
1810         scm_destroy(&scm);
1811         return len;
1812
1813 out_unlock:
1814         if (sk_locked)
1815                 unix_state_unlock(sk);
1816         unix_state_unlock(other);
1817 out_free:
1818         kfree_skb(skb);
1819 out:
1820         if (other)
1821                 sock_put(other);
1822         scm_destroy(&scm);
1823         return err;
1824 }
1825
1826 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1827  * bytes, and a minimun of a full page.
1828  */
1829 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1830
1831 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1832                                size_t len)
1833 {
1834         struct sock *sk = sock->sk;
1835         struct sock *other = NULL;
1836         int err, size;
1837         struct sk_buff *skb;
1838         int sent = 0;
1839         struct scm_cookie scm;
1840         bool fds_sent = false;
1841         int data_len;
1842
1843         wait_for_unix_gc();
1844         err = scm_send(sock, msg, &scm, false);
1845         if (err < 0)
1846                 return err;
1847
1848         err = -EOPNOTSUPP;
1849         if (msg->msg_flags&MSG_OOB)
1850                 goto out_err;
1851
1852         if (msg->msg_namelen) {
1853                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1854                 goto out_err;
1855         } else {
1856                 err = -ENOTCONN;
1857                 other = unix_peer(sk);
1858                 if (!other)
1859                         goto out_err;
1860         }
1861
1862         if (sk->sk_shutdown & SEND_SHUTDOWN)
1863                 goto pipe_err;
1864
1865         while (sent < len) {
1866                 size = len - sent;
1867
1868                 /* Keep two messages in the pipe so it schedules better */
1869                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1870
1871                 /* allow fallback to order-0 allocations */
1872                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1873
1874                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1875
1876                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1877
1878                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1879                                            msg->msg_flags & MSG_DONTWAIT, &err,
1880                                            get_order(UNIX_SKB_FRAGS_SZ));
1881                 if (!skb)
1882                         goto out_err;
1883
1884                 /* Only send the fds in the first buffer */
1885                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1886                 if (err < 0) {
1887                         kfree_skb(skb);
1888                         goto out_err;
1889                 }
1890                 fds_sent = true;
1891
1892                 skb_put(skb, size - data_len);
1893                 skb->data_len = data_len;
1894                 skb->len = size;
1895                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1896                 if (err) {
1897                         kfree_skb(skb);
1898                         goto out_err;
1899                 }
1900
1901                 unix_state_lock(other);
1902
1903                 if (sock_flag(other, SOCK_DEAD) ||
1904                     (other->sk_shutdown & RCV_SHUTDOWN))
1905                         goto pipe_err_free;
1906
1907                 maybe_add_creds(skb, sock, other);
1908                 skb_queue_tail(&other->sk_receive_queue, skb);
1909                 unix_state_unlock(other);
1910                 other->sk_data_ready(other);
1911                 sent += size;
1912         }
1913
1914         scm_destroy(&scm);
1915
1916         return sent;
1917
1918 pipe_err_free:
1919         unix_state_unlock(other);
1920         kfree_skb(skb);
1921 pipe_err:
1922         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1923                 send_sig(SIGPIPE, current, 0);
1924         err = -EPIPE;
1925 out_err:
1926         scm_destroy(&scm);
1927         return sent ? : err;
1928 }
1929
1930 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1931                                     int offset, size_t size, int flags)
1932 {
1933         int err;
1934         bool send_sigpipe = false;
1935         bool init_scm = true;
1936         struct scm_cookie scm;
1937         struct sock *other, *sk = socket->sk;
1938         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1939
1940         if (flags & MSG_OOB)
1941                 return -EOPNOTSUPP;
1942
1943         other = unix_peer(sk);
1944         if (!other || sk->sk_state != TCP_ESTABLISHED)
1945                 return -ENOTCONN;
1946
1947         if (false) {
1948 alloc_skb:
1949                 unix_state_unlock(other);
1950                 mutex_unlock(&unix_sk(other)->iolock);
1951                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1952                                               &err, 0);
1953                 if (!newskb)
1954                         goto err;
1955         }
1956
1957         /* we must acquire iolock as we modify already present
1958          * skbs in the sk_receive_queue and mess with skb->len
1959          */
1960         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1961         if (err) {
1962                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1963                 goto err;
1964         }
1965
1966         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1967                 err = -EPIPE;
1968                 send_sigpipe = true;
1969                 goto err_unlock;
1970         }
1971
1972         unix_state_lock(other);
1973
1974         if (sock_flag(other, SOCK_DEAD) ||
1975             other->sk_shutdown & RCV_SHUTDOWN) {
1976                 err = -EPIPE;
1977                 send_sigpipe = true;
1978                 goto err_state_unlock;
1979         }
1980
1981         if (init_scm) {
1982                 err = maybe_init_creds(&scm, socket, other);
1983                 if (err)
1984                         goto err_state_unlock;
1985                 init_scm = false;
1986         }
1987
1988         skb = skb_peek_tail(&other->sk_receive_queue);
1989         if (tail && tail == skb) {
1990                 skb = newskb;
1991         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1992                 if (newskb) {
1993                         skb = newskb;
1994                 } else {
1995                         tail = skb;
1996                         goto alloc_skb;
1997                 }
1998         } else if (newskb) {
1999                 /* this is fast path, we don't necessarily need to
2000                  * call to kfree_skb even though with newskb == NULL
2001                  * this - does no harm
2002                  */
2003                 consume_skb(newskb);
2004                 newskb = NULL;
2005         }
2006
2007         if (skb_append_pagefrags(skb, page, offset, size)) {
2008                 tail = skb;
2009                 goto alloc_skb;
2010         }
2011
2012         skb->len += size;
2013         skb->data_len += size;
2014         skb->truesize += size;
2015         refcount_add(size, &sk->sk_wmem_alloc);
2016
2017         if (newskb) {
2018                 err = unix_scm_to_skb(&scm, skb, false);
2019                 if (err)
2020                         goto err_state_unlock;
2021                 spin_lock(&other->sk_receive_queue.lock);
2022                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2023                 spin_unlock(&other->sk_receive_queue.lock);
2024         }
2025
2026         unix_state_unlock(other);
2027         mutex_unlock(&unix_sk(other)->iolock);
2028
2029         other->sk_data_ready(other);
2030         scm_destroy(&scm);
2031         return size;
2032
2033 err_state_unlock:
2034         unix_state_unlock(other);
2035 err_unlock:
2036         mutex_unlock(&unix_sk(other)->iolock);
2037 err:
2038         kfree_skb(newskb);
2039         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2040                 send_sig(SIGPIPE, current, 0);
2041         if (!init_scm)
2042                 scm_destroy(&scm);
2043         return err;
2044 }
2045
2046 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2047                                   size_t len)
2048 {
2049         int err;
2050         struct sock *sk = sock->sk;
2051
2052         err = sock_error(sk);
2053         if (err)
2054                 return err;
2055
2056         if (sk->sk_state != TCP_ESTABLISHED)
2057                 return -ENOTCONN;
2058
2059         if (msg->msg_namelen)
2060                 msg->msg_namelen = 0;
2061
2062         return unix_dgram_sendmsg(sock, msg, len);
2063 }
2064
2065 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2066                                   size_t size, int flags)
2067 {
2068         struct sock *sk = sock->sk;
2069
2070         if (sk->sk_state != TCP_ESTABLISHED)
2071                 return -ENOTCONN;
2072
2073         return unix_dgram_recvmsg(sock, msg, size, flags);
2074 }
2075
2076 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2077 {
2078         struct unix_sock *u = unix_sk(sk);
2079
2080         if (u->addr) {
2081                 msg->msg_namelen = u->addr->len;
2082                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
2083         }
2084 }
2085
2086 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2087                               size_t size, int flags)
2088 {
2089         struct scm_cookie scm;
2090         struct sock *sk = sock->sk;
2091         struct unix_sock *u = unix_sk(sk);
2092         struct sk_buff *skb, *last;
2093         long timeo;
2094         int err;
2095         int peeked, skip;
2096
2097         err = -EOPNOTSUPP;
2098         if (flags&MSG_OOB)
2099                 goto out;
2100
2101         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2102
2103         do {
2104                 mutex_lock(&u->iolock);
2105
2106                 skip = sk_peek_offset(sk, flags);
2107                 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2108                                               &err, &last);
2109                 if (skb)
2110                         break;
2111
2112                 mutex_unlock(&u->iolock);
2113
2114                 if (err != -EAGAIN)
2115                         break;
2116         } while (timeo &&
2117                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2118
2119         if (!skb) { /* implies iolock unlocked */
2120                 unix_state_lock(sk);
2121                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2122                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2123                     (sk->sk_shutdown & RCV_SHUTDOWN))
2124                         err = 0;
2125                 unix_state_unlock(sk);
2126                 goto out;
2127         }
2128
2129         if (wq_has_sleeper(&u->peer_wait))
2130                 wake_up_interruptible_sync_poll(&u->peer_wait,
2131                                                 POLLOUT | POLLWRNORM |
2132                                                 POLLWRBAND);
2133
2134         if (msg->msg_name)
2135                 unix_copy_addr(msg, skb->sk);
2136
2137         if (size > skb->len - skip)
2138                 size = skb->len - skip;
2139         else if (size < skb->len - skip)
2140                 msg->msg_flags |= MSG_TRUNC;
2141
2142         err = skb_copy_datagram_msg(skb, skip, msg, size);
2143         if (err)
2144                 goto out_free;
2145
2146         if (sock_flag(sk, SOCK_RCVTSTAMP))
2147                 __sock_recv_timestamp(msg, sk, skb);
2148
2149         memset(&scm, 0, sizeof(scm));
2150
2151         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2152         unix_set_secdata(&scm, skb);
2153
2154         if (!(flags & MSG_PEEK)) {
2155                 if (UNIXCB(skb).fp)
2156                         unix_detach_fds(&scm, skb);
2157
2158                 sk_peek_offset_bwd(sk, skb->len);
2159         } else {
2160                 /* It is questionable: on PEEK we could:
2161                    - do not return fds - good, but too simple 8)
2162                    - return fds, and do not return them on read (old strategy,
2163                      apparently wrong)
2164                    - clone fds (I chose it for now, it is the most universal
2165                      solution)
2166
2167                    POSIX 1003.1g does not actually define this clearly
2168                    at all. POSIX 1003.1g doesn't define a lot of things
2169                    clearly however!
2170
2171                 */
2172
2173                 sk_peek_offset_fwd(sk, size);
2174
2175                 if (UNIXCB(skb).fp)
2176                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2177         }
2178         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2179
2180         scm_recv(sock, msg, &scm, flags);
2181
2182 out_free:
2183         skb_free_datagram(sk, skb);
2184         mutex_unlock(&u->iolock);
2185 out:
2186         return err;
2187 }
2188
2189 /*
2190  *      Sleep until more data has arrived. But check for races..
2191  */
2192 static long unix_stream_data_wait(struct sock *sk, long timeo,
2193                                   struct sk_buff *last, unsigned int last_len,
2194                                   bool freezable)
2195 {
2196         struct sk_buff *tail;
2197         DEFINE_WAIT(wait);
2198
2199         unix_state_lock(sk);
2200
2201         for (;;) {
2202                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2203
2204                 tail = skb_peek_tail(&sk->sk_receive_queue);
2205                 if (tail != last ||
2206                     (tail && tail->len != last_len) ||
2207                     sk->sk_err ||
2208                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2209                     signal_pending(current) ||
2210                     !timeo)
2211                         break;
2212
2213                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2214                 unix_state_unlock(sk);
2215                 if (freezable)
2216                         timeo = freezable_schedule_timeout(timeo);
2217                 else
2218                         timeo = schedule_timeout(timeo);
2219                 unix_state_lock(sk);
2220
2221                 if (sock_flag(sk, SOCK_DEAD))
2222                         break;
2223
2224                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2225         }
2226
2227         finish_wait(sk_sleep(sk), &wait);
2228         unix_state_unlock(sk);
2229         return timeo;
2230 }
2231
2232 static unsigned int unix_skb_len(const struct sk_buff *skb)
2233 {
2234         return skb->len - UNIXCB(skb).consumed;
2235 }
2236
2237 struct unix_stream_read_state {
2238         int (*recv_actor)(struct sk_buff *, int, int,
2239                           struct unix_stream_read_state *);
2240         struct socket *socket;
2241         struct msghdr *msg;
2242         struct pipe_inode_info *pipe;
2243         size_t size;
2244         int flags;
2245         unsigned int splice_flags;
2246 };
2247
2248 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2249                                     bool freezable)
2250 {
2251         struct scm_cookie scm;
2252         struct socket *sock = state->socket;
2253         struct sock *sk = sock->sk;
2254         struct unix_sock *u = unix_sk(sk);
2255         int copied = 0;
2256         int flags = state->flags;
2257         int noblock = flags & MSG_DONTWAIT;
2258         bool check_creds = false;
2259         int target;
2260         int err = 0;
2261         long timeo;
2262         int skip;
2263         size_t size = state->size;
2264         unsigned int last_len;
2265
2266         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2267                 err = -EINVAL;
2268                 goto out;
2269         }
2270
2271         if (unlikely(flags & MSG_OOB)) {
2272                 err = -EOPNOTSUPP;
2273                 goto out;
2274         }
2275
2276         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2277         timeo = sock_rcvtimeo(sk, noblock);
2278
2279         memset(&scm, 0, sizeof(scm));
2280
2281         /* Lock the socket to prevent queue disordering
2282          * while sleeps in memcpy_tomsg
2283          */
2284         mutex_lock(&u->iolock);
2285
2286         skip = max(sk_peek_offset(sk, flags), 0);
2287
2288         do {
2289                 int chunk;
2290                 bool drop_skb;
2291                 struct sk_buff *skb, *last;
2292
2293 redo:
2294                 unix_state_lock(sk);
2295                 if (sock_flag(sk, SOCK_DEAD)) {
2296                         err = -ECONNRESET;
2297                         goto unlock;
2298                 }
2299                 last = skb = skb_peek(&sk->sk_receive_queue);
2300                 last_len = last ? last->len : 0;
2301 again:
2302                 if (skb == NULL) {
2303                         if (copied >= target)
2304                                 goto unlock;
2305
2306                         /*
2307                          *      POSIX 1003.1g mandates this order.
2308                          */
2309
2310                         err = sock_error(sk);
2311                         if (err)
2312                                 goto unlock;
2313                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2314                                 goto unlock;
2315
2316                         unix_state_unlock(sk);
2317                         if (!timeo) {
2318                                 err = -EAGAIN;
2319                                 break;
2320                         }
2321
2322                         mutex_unlock(&u->iolock);
2323
2324                         timeo = unix_stream_data_wait(sk, timeo, last,
2325                                                       last_len, freezable);
2326
2327                         if (signal_pending(current)) {
2328                                 err = sock_intr_errno(timeo);
2329                                 scm_destroy(&scm);
2330                                 goto out;
2331                         }
2332
2333                         mutex_lock(&u->iolock);
2334                         goto redo;
2335 unlock:
2336                         unix_state_unlock(sk);
2337                         break;
2338                 }
2339
2340                 while (skip >= unix_skb_len(skb)) {
2341                         skip -= unix_skb_len(skb);
2342                         last = skb;
2343                         last_len = skb->len;
2344                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2345                         if (!skb)
2346                                 goto again;
2347                 }
2348
2349                 unix_state_unlock(sk);
2350
2351                 if (check_creds) {
2352                         /* Never glue messages from different writers */
2353                         if (!unix_skb_scm_eq(skb, &scm))
2354                                 break;
2355                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2356                         /* Copy credentials */
2357                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2358                         unix_set_secdata(&scm, skb);
2359                         check_creds = true;
2360                 }
2361
2362                 /* Copy address just once */
2363                 if (state->msg && state->msg->msg_name) {
2364                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2365                                          state->msg->msg_name);
2366                         unix_copy_addr(state->msg, skb->sk);
2367                         sunaddr = NULL;
2368                 }
2369
2370                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2371                 skb_get(skb);
2372                 chunk = state->recv_actor(skb, skip, chunk, state);
2373                 drop_skb = !unix_skb_len(skb);
2374                 /* skb is only safe to use if !drop_skb */
2375                 consume_skb(skb);
2376                 if (chunk < 0) {
2377                         if (copied == 0)
2378                                 copied = -EFAULT;
2379                         break;
2380                 }
2381                 copied += chunk;
2382                 size -= chunk;
2383
2384                 if (drop_skb) {
2385                         /* the skb was touched by a concurrent reader;
2386                          * we should not expect anything from this skb
2387                          * anymore and assume it invalid - we can be
2388                          * sure it was dropped from the socket queue
2389                          *
2390                          * let's report a short read
2391                          */
2392                         err = 0;
2393                         break;
2394                 }
2395
2396                 /* Mark read part of skb as used */
2397                 if (!(flags & MSG_PEEK)) {
2398                         UNIXCB(skb).consumed += chunk;
2399
2400                         sk_peek_offset_bwd(sk, chunk);
2401
2402                         if (UNIXCB(skb).fp)
2403                                 unix_detach_fds(&scm, skb);
2404
2405                         if (unix_skb_len(skb))
2406                                 break;
2407
2408                         skb_unlink(skb, &sk->sk_receive_queue);
2409                         consume_skb(skb);
2410
2411                         if (scm.fp)
2412                                 break;
2413                 } else {
2414                         /* It is questionable, see note in unix_dgram_recvmsg.
2415                          */
2416                         if (UNIXCB(skb).fp)
2417                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2418
2419                         sk_peek_offset_fwd(sk, chunk);
2420
2421                         if (UNIXCB(skb).fp)
2422                                 break;
2423
2424                         skip = 0;
2425                         last = skb;
2426                         last_len = skb->len;
2427                         unix_state_lock(sk);
2428                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2429                         if (skb)
2430                                 goto again;
2431                         unix_state_unlock(sk);
2432                         break;
2433                 }
2434         } while (size);
2435
2436         mutex_unlock(&u->iolock);
2437         if (state->msg)
2438                 scm_recv(sock, state->msg, &scm, flags);
2439         else
2440                 scm_destroy(&scm);
2441 out:
2442         return copied ? : err;
2443 }
2444
2445 static int unix_stream_read_actor(struct sk_buff *skb,
2446                                   int skip, int chunk,
2447                                   struct unix_stream_read_state *state)
2448 {
2449         int ret;
2450
2451         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2452                                     state->msg, chunk);
2453         return ret ?: chunk;
2454 }
2455
2456 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2457                                size_t size, int flags)
2458 {
2459         struct unix_stream_read_state state = {
2460                 .recv_actor = unix_stream_read_actor,
2461                 .socket = sock,
2462                 .msg = msg,
2463                 .size = size,
2464                 .flags = flags
2465         };
2466
2467         return unix_stream_read_generic(&state, true);
2468 }
2469
2470 static int unix_stream_splice_actor(struct sk_buff *skb,
2471                                     int skip, int chunk,
2472                                     struct unix_stream_read_state *state)
2473 {
2474         return skb_splice_bits(skb, state->socket->sk,
2475                                UNIXCB(skb).consumed + skip,
2476                                state->pipe, chunk, state->splice_flags);
2477 }
2478
2479 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2480                                        struct pipe_inode_info *pipe,
2481                                        size_t size, unsigned int flags)
2482 {
2483         struct unix_stream_read_state state = {
2484                 .recv_actor = unix_stream_splice_actor,
2485                 .socket = sock,
2486                 .pipe = pipe,
2487                 .size = size,
2488                 .splice_flags = flags,
2489         };
2490
2491         if (unlikely(*ppos))
2492                 return -ESPIPE;
2493
2494         if (sock->file->f_flags & O_NONBLOCK ||
2495             flags & SPLICE_F_NONBLOCK)
2496                 state.flags = MSG_DONTWAIT;
2497
2498         return unix_stream_read_generic(&state, false);
2499 }
2500
2501 static int unix_shutdown(struct socket *sock, int mode)
2502 {
2503         struct sock *sk = sock->sk;
2504         struct sock *other;
2505
2506         if (mode < SHUT_RD || mode > SHUT_RDWR)
2507                 return -EINVAL;
2508         /* This maps:
2509          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2510          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2511          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2512          */
2513         ++mode;
2514
2515         unix_state_lock(sk);
2516         sk->sk_shutdown |= mode;
2517         other = unix_peer(sk);
2518         if (other)
2519                 sock_hold(other);
2520         unix_state_unlock(sk);
2521         sk->sk_state_change(sk);
2522
2523         if (other &&
2524                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2525
2526                 int peer_mode = 0;
2527
2528                 if (mode&RCV_SHUTDOWN)
2529                         peer_mode |= SEND_SHUTDOWN;
2530                 if (mode&SEND_SHUTDOWN)
2531                         peer_mode |= RCV_SHUTDOWN;
2532                 unix_state_lock(other);
2533                 other->sk_shutdown |= peer_mode;
2534                 unix_state_unlock(other);
2535                 other->sk_state_change(other);
2536                 if (peer_mode == SHUTDOWN_MASK)
2537                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2538                 else if (peer_mode & RCV_SHUTDOWN)
2539                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2540         }
2541         if (other)
2542                 sock_put(other);
2543
2544         return 0;
2545 }
2546
2547 long unix_inq_len(struct sock *sk)
2548 {
2549         struct sk_buff *skb;
2550         long amount = 0;
2551
2552         if (sk->sk_state == TCP_LISTEN)
2553                 return -EINVAL;
2554
2555         spin_lock(&sk->sk_receive_queue.lock);
2556         if (sk->sk_type == SOCK_STREAM ||
2557             sk->sk_type == SOCK_SEQPACKET) {
2558                 skb_queue_walk(&sk->sk_receive_queue, skb)
2559                         amount += unix_skb_len(skb);
2560         } else {
2561                 skb = skb_peek(&sk->sk_receive_queue);
2562                 if (skb)
2563                         amount = skb->len;
2564         }
2565         spin_unlock(&sk->sk_receive_queue.lock);
2566
2567         return amount;
2568 }
2569 EXPORT_SYMBOL_GPL(unix_inq_len);
2570
2571 long unix_outq_len(struct sock *sk)
2572 {
2573         return sk_wmem_alloc_get(sk);
2574 }
2575 EXPORT_SYMBOL_GPL(unix_outq_len);
2576
2577 static int unix_open_file(struct sock *sk)
2578 {
2579         struct path path;
2580         struct file *f;
2581         int fd;
2582
2583         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2584                 return -EPERM;
2585
2586         unix_state_lock(sk);
2587         path = unix_sk(sk)->path;
2588         if (!path.dentry) {
2589                 unix_state_unlock(sk);
2590                 return -ENOENT;
2591         }
2592
2593         path_get(&path);
2594         unix_state_unlock(sk);
2595
2596         fd = get_unused_fd_flags(O_CLOEXEC);
2597         if (fd < 0)
2598                 goto out;
2599
2600         f = dentry_open(&path, O_PATH, current_cred());
2601         if (IS_ERR(f)) {
2602                 put_unused_fd(fd);
2603                 fd = PTR_ERR(f);
2604                 goto out;
2605         }
2606
2607         fd_install(fd, f);
2608 out:
2609         path_put(&path);
2610
2611         return fd;
2612 }
2613
2614 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2615 {
2616         struct sock *sk = sock->sk;
2617         long amount = 0;
2618         int err;
2619
2620         switch (cmd) {
2621         case SIOCOUTQ:
2622                 amount = unix_outq_len(sk);
2623                 err = put_user(amount, (int __user *)arg);
2624                 break;
2625         case SIOCINQ:
2626                 amount = unix_inq_len(sk);
2627                 if (amount < 0)
2628                         err = amount;
2629                 else
2630                         err = put_user(amount, (int __user *)arg);
2631                 break;
2632         case SIOCUNIXFILE:
2633                 err = unix_open_file(sk);
2634                 break;
2635         default:
2636                 err = -ENOIOCTLCMD;
2637                 break;
2638         }
2639         return err;
2640 }
2641
2642 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2643 {
2644         struct sock *sk = sock->sk;
2645         unsigned int mask;
2646
2647         sock_poll_wait(file, sk_sleep(sk), wait);
2648         mask = 0;
2649
2650         /* exceptional events? */
2651         if (sk->sk_err)
2652                 mask |= POLLERR;
2653         if (sk->sk_shutdown == SHUTDOWN_MASK)
2654                 mask |= POLLHUP;
2655         if (sk->sk_shutdown & RCV_SHUTDOWN)
2656                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2657
2658         /* readable? */
2659         if (!skb_queue_empty(&sk->sk_receive_queue))
2660                 mask |= POLLIN | POLLRDNORM;
2661
2662         /* Connection-based need to check for termination and startup */
2663         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2664             sk->sk_state == TCP_CLOSE)
2665                 mask |= POLLHUP;
2666
2667         /*
2668          * we set writable also when the other side has shut down the
2669          * connection. This prevents stuck sockets.
2670          */
2671         if (unix_writable(sk))
2672                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2673
2674         return mask;
2675 }
2676
2677 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2678                                     poll_table *wait)
2679 {
2680         struct sock *sk = sock->sk, *other;
2681         unsigned int mask, writable;
2682
2683         sock_poll_wait(file, sk_sleep(sk), wait);
2684         mask = 0;
2685
2686         /* exceptional events? */
2687         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2688                 mask |= POLLERR |
2689                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2690
2691         if (sk->sk_shutdown & RCV_SHUTDOWN)
2692                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2693         if (sk->sk_shutdown == SHUTDOWN_MASK)
2694                 mask |= POLLHUP;
2695
2696         /* readable? */
2697         if (!skb_queue_empty(&sk->sk_receive_queue))
2698                 mask |= POLLIN | POLLRDNORM;
2699
2700         /* Connection-based need to check for termination and startup */
2701         if (sk->sk_type == SOCK_SEQPACKET) {
2702                 if (sk->sk_state == TCP_CLOSE)
2703                         mask |= POLLHUP;
2704                 /* connection hasn't started yet? */
2705                 if (sk->sk_state == TCP_SYN_SENT)
2706                         return mask;
2707         }
2708
2709         /* No write status requested, avoid expensive OUT tests. */
2710         if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2711                 return mask;
2712
2713         writable = unix_writable(sk);
2714         if (writable) {
2715                 unix_state_lock(sk);
2716
2717                 other = unix_peer(sk);
2718                 if (other && unix_peer(other) != sk &&
2719                     unix_recvq_full(other) &&
2720                     unix_dgram_peer_wake_me(sk, other))
2721                         writable = 0;
2722
2723                 unix_state_unlock(sk);
2724         }
2725
2726         if (writable)
2727                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2728         else
2729                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2730
2731         return mask;
2732 }
2733
2734 #ifdef CONFIG_PROC_FS
2735
2736 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2737
2738 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2739 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2740 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2741
2742 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2743 {
2744         unsigned long offset = get_offset(*pos);
2745         unsigned long bucket = get_bucket(*pos);
2746         struct sock *sk;
2747         unsigned long count = 0;
2748
2749         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2750                 if (sock_net(sk) != seq_file_net(seq))
2751                         continue;
2752                 if (++count == offset)
2753                         break;
2754         }
2755
2756         return sk;
2757 }
2758
2759 static struct sock *unix_next_socket(struct seq_file *seq,
2760                                      struct sock *sk,
2761                                      loff_t *pos)
2762 {
2763         unsigned long bucket;
2764
2765         while (sk > (struct sock *)SEQ_START_TOKEN) {
2766                 sk = sk_next(sk);
2767                 if (!sk)
2768                         goto next_bucket;
2769                 if (sock_net(sk) == seq_file_net(seq))
2770                         return sk;
2771         }
2772
2773         do {
2774                 sk = unix_from_bucket(seq, pos);
2775                 if (sk)
2776                         return sk;
2777
2778 next_bucket:
2779                 bucket = get_bucket(*pos) + 1;
2780                 *pos = set_bucket_offset(bucket, 1);
2781         } while (bucket < ARRAY_SIZE(unix_socket_table));
2782
2783         return NULL;
2784 }
2785
2786 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2787         __acquires(unix_table_lock)
2788 {
2789         spin_lock(&unix_table_lock);
2790
2791         if (!*pos)
2792                 return SEQ_START_TOKEN;
2793
2794         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2795                 return NULL;
2796
2797         return unix_next_socket(seq, NULL, pos);
2798 }
2799
2800 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2801 {
2802         ++*pos;
2803         return unix_next_socket(seq, v, pos);
2804 }
2805
2806 static void unix_seq_stop(struct seq_file *seq, void *v)
2807         __releases(unix_table_lock)
2808 {
2809         spin_unlock(&unix_table_lock);
2810 }
2811
2812 static int unix_seq_show(struct seq_file *seq, void *v)
2813 {
2814
2815         if (v == SEQ_START_TOKEN)
2816                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2817                          "Inode Path\n");
2818         else {
2819                 struct sock *s = v;
2820                 struct unix_sock *u = unix_sk(s);
2821                 unix_state_lock(s);
2822
2823                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2824                         s,
2825                         refcount_read(&s->sk_refcnt),
2826                         0,
2827                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2828                         s->sk_type,
2829                         s->sk_socket ?
2830                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2831                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2832                         sock_i_ino(s));
2833
2834                 if (u->addr) {
2835                         int i, len;
2836                         seq_putc(seq, ' ');
2837
2838                         i = 0;
2839                         len = u->addr->len - sizeof(short);
2840                         if (!UNIX_ABSTRACT(s))
2841                                 len--;
2842                         else {
2843                                 seq_putc(seq, '@');
2844                                 i++;
2845                         }
2846                         for ( ; i < len; i++)
2847                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2848                                          '@');
2849                 }
2850                 unix_state_unlock(s);
2851                 seq_putc(seq, '\n');
2852         }
2853
2854         return 0;
2855 }
2856
2857 static const struct seq_operations unix_seq_ops = {
2858         .start  = unix_seq_start,
2859         .next   = unix_seq_next,
2860         .stop   = unix_seq_stop,
2861         .show   = unix_seq_show,
2862 };
2863
2864 static int unix_seq_open(struct inode *inode, struct file *file)
2865 {
2866         return seq_open_net(inode, file, &unix_seq_ops,
2867                             sizeof(struct seq_net_private));
2868 }
2869
2870 static const struct file_operations unix_seq_fops = {
2871         .owner          = THIS_MODULE,
2872         .open           = unix_seq_open,
2873         .read           = seq_read,
2874         .llseek         = seq_lseek,
2875         .release        = seq_release_net,
2876 };
2877
2878 #endif
2879
2880 static const struct net_proto_family unix_family_ops = {
2881         .family = PF_UNIX,
2882         .create = unix_create,
2883         .owner  = THIS_MODULE,
2884 };
2885
2886
2887 static int __net_init unix_net_init(struct net *net)
2888 {
2889         int error = -ENOMEM;
2890
2891         net->unx.sysctl_max_dgram_qlen = 10;
2892         if (unix_sysctl_register(net))
2893                 goto out;
2894
2895 #ifdef CONFIG_PROC_FS
2896         if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2897                 unix_sysctl_unregister(net);
2898                 goto out;
2899         }
2900 #endif
2901         error = 0;
2902 out:
2903         return error;
2904 }
2905
2906 static void __net_exit unix_net_exit(struct net *net)
2907 {
2908         unix_sysctl_unregister(net);
2909         remove_proc_entry("unix", net->proc_net);
2910 }
2911
2912 static struct pernet_operations unix_net_ops = {
2913         .init = unix_net_init,
2914         .exit = unix_net_exit,
2915 };
2916
2917 static int __init af_unix_init(void)
2918 {
2919         int rc = -1;
2920
2921         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2922
2923         rc = proto_register(&unix_proto, 1);
2924         if (rc != 0) {
2925                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2926                 goto out;
2927         }
2928
2929         sock_register(&unix_family_ops);
2930         register_pernet_subsys(&unix_net_ops);
2931 out:
2932         return rc;
2933 }
2934
2935 static void __exit af_unix_exit(void)
2936 {
2937         sock_unregister(PF_UNIX);
2938         proto_unregister(&unix_proto);
2939         unregister_pernet_subsys(&unix_net_ops);
2940 }
2941
2942 /* Earlier than device_initcall() so that other drivers invoking
2943    request_module() don't end up in a loop when modprobe tries
2944    to use a UNIX socket. But later than subsys_initcall() because
2945    we depend on stuff initialised there */
2946 fs_initcall(af_unix_init);
2947 module_exit(af_unix_exit);
2948
2949 MODULE_LICENSE("GPL");
2950 MODULE_ALIAS_NETPROTO(PF_UNIX);