--- /dev/null
+From 6e1ce3c3451291142a57c4f3f6f999a29fb5b3bc Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 1 Sep 2016 14:43:53 -0700
+Subject: af_unix: split 'u->readlock' into two: 'iolock' and 'bindlock'
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 6e1ce3c3451291142a57c4f3f6f999a29fb5b3bc upstream.
+
+Right now we use the 'readlock' both for protecting some of the af_unix
+IO path and for making the bind be single-threaded.
+
+The two are independent, but using the same lock makes for a nasty
+deadlock due to ordering with regards to filesystem locking. The bind
+locking would want to nest outside the VSF pathname locking, but the IO
+locking wants to nest inside some of those same locks.
+
+We tried to fix this earlier with commit c845acb324aa ("af_unix: Fix
+splice-bind deadlock") which moved the readlock inside the vfs locks,
+but that caused problems with overlayfs that will then call back into
+filesystem routines that take the lock in the wrong order anyway.
+
+Splitting the locks means that we can go back to having the bind lock be
+the outermost lock, and we don't have any deadlocks with lock ordering.
+
+Acked-by: Rainer Weikusat <rweikusat@cyberadapt.com>
+Acked-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/af_unix.h | 2 +-
+ net/unix/af_unix.c | 41 +++++++++++++++++++++--------------------
+ 2 files changed, 22 insertions(+), 21 deletions(-)
+
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -52,7 +52,7 @@ struct unix_sock {
+ struct sock sk;
+ struct unix_address *addr;
+ struct path path;
+- struct mutex readlock;
++ struct mutex iolock, bindlock;
+ struct sock *peer;
+ struct list_head link;
+ atomic_long_t inflight;
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock
+ {
+ struct unix_sock *u = unix_sk(sk);
+
+- if (mutex_lock_interruptible(&u->readlock))
++ if (mutex_lock_interruptible(&u->iolock))
+ return -EINTR;
+
+ sk->sk_peek_off = val;
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+
+ return 0;
+ }
+@@ -778,7 +778,8 @@ static struct sock *unix_create1(struct
+ spin_lock_init(&u->lock);
+ atomic_long_set(&u->inflight, 0);
+ INIT_LIST_HEAD(&u->link);
+- mutex_init(&u->readlock); /* single task reading lock */
++ mutex_init(&u->iolock); /* single task reading lock */
++ mutex_init(&u->bindlock); /* single task binding lock */
+ init_waitqueue_head(&u->peer_wait);
+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
+ unix_insert_socket(unix_sockets_unbound(sk), sk);
+@@ -847,7 +848,7 @@ static int unix_autobind(struct socket *
+ int err;
+ unsigned int retries = 0;
+
+- err = mutex_lock_interruptible(&u->readlock);
++ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+ return err;
+
+@@ -894,7 +895,7 @@ retry:
+ spin_unlock(&unix_table_lock);
+ err = 0;
+
+-out: mutex_unlock(&u->readlock);
++out: mutex_unlock(&u->bindlock);
+ return err;
+ }
+
+@@ -1008,7 +1009,7 @@ static int unix_bind(struct socket *sock
+ goto out;
+ addr_len = err;
+
+- err = mutex_lock_interruptible(&u->readlock);
++ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+ goto out;
+
+@@ -1062,7 +1063,7 @@ static int unix_bind(struct socket *sock
+ out_unlock:
+ spin_unlock(&unix_table_lock);
+ out_up:
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->bindlock);
+ out:
+ return err;
+ }
+@@ -1957,17 +1958,17 @@ static ssize_t unix_stream_sendpage(stru
+ if (false) {
+ alloc_skb:
+ unix_state_unlock(other);
+- mutex_unlock(&unix_sk(other)->readlock);
++ mutex_unlock(&unix_sk(other)->iolock);
+ newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
+ &err, 0);
+ if (!newskb)
+ goto err;
+ }
+
+- /* we must acquire readlock as we modify already present
++ /* we must acquire iolock as we modify already present
+ * skbs in the sk_receive_queue and mess with skb->len
+ */
+- err = mutex_lock_interruptible(&unix_sk(other)->readlock);
++ err = mutex_lock_interruptible(&unix_sk(other)->iolock);
+ if (err) {
+ err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
+ goto err;
+@@ -2034,7 +2035,7 @@ alloc_skb:
+ }
+
+ unix_state_unlock(other);
+- mutex_unlock(&unix_sk(other)->readlock);
++ mutex_unlock(&unix_sk(other)->iolock);
+
+ other->sk_data_ready(other);
+ scm_destroy(&scm);
+@@ -2043,7 +2044,7 @@ alloc_skb:
+ err_state_unlock:
+ unix_state_unlock(other);
+ err_unlock:
+- mutex_unlock(&unix_sk(other)->readlock);
++ mutex_unlock(&unix_sk(other)->iolock);
+ err:
+ kfree_skb(newskb);
+ if (send_sigpipe && !(flags & MSG_NOSIGNAL))
+@@ -2108,7 +2109,7 @@ static int unix_dgram_recvmsg(struct soc
+ if (flags&MSG_OOB)
+ goto out;
+
+- err = mutex_lock_interruptible(&u->readlock);
++ err = mutex_lock_interruptible(&u->iolock);
+ if (unlikely(err)) {
+ /* recvmsg() in non blocking mode is supposed to return -EAGAIN
+ * sk_rcvtimeo is not honored by mutex_lock_interruptible()
+@@ -2184,7 +2185,7 @@ static int unix_dgram_recvmsg(struct soc
+ out_free:
+ skb_free_datagram(sk, skb);
+ out_unlock:
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+ out:
+ return err;
+ }
+@@ -2279,7 +2280,7 @@ static int unix_stream_read_generic(stru
+ /* Lock the socket to prevent queue disordering
+ * while sleeps in memcpy_tomsg
+ */
+- mutex_lock(&u->readlock);
++ mutex_lock(&u->iolock);
+
+ if (flags & MSG_PEEK)
+ skip = sk_peek_offset(sk, flags);
+@@ -2320,7 +2321,7 @@ again:
+ break;
+ }
+
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+
+ timeo = unix_stream_data_wait(sk, timeo, last,
+ last_len);
+@@ -2331,7 +2332,7 @@ again:
+ goto out;
+ }
+
+- mutex_lock(&u->readlock);
++ mutex_lock(&u->iolock);
+ continue;
+ unlock:
+ unix_state_unlock(sk);
+@@ -2434,7 +2435,7 @@ unlock:
+ }
+ } while (size);
+
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+ if (state->msg)
+ scm_recv(sock, state->msg, &scm, flags);
+ else
+@@ -2475,9 +2476,9 @@ static ssize_t skb_unix_socket_splice(st
+ int ret;
+ struct unix_sock *u = unix_sk(sk);
+
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+ ret = splice_to_pipe(pipe, spd);
+- mutex_lock(&u->readlock);
++ mutex_lock(&u->iolock);
+
+ return ret;
+ }
--- /dev/null
+From 38f7bd94a97b542de86a2be9229289717e33a7a4 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 1 Sep 2016 14:56:49 -0700
+Subject: Revert "af_unix: Fix splice-bind deadlock"
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 38f7bd94a97b542de86a2be9229289717e33a7a4 upstream.
+
+This reverts commit c845acb324aa85a39650a14e7696982ceea75dc1.
+
+It turns out that it just replaces one deadlock with another one: we can
+still get the wrong lock ordering with the readlock due to overlayfs
+calling back into the filesystem layer and still taking the vfs locks
+after the readlock.
+
+The proper solution ends up being to just split the readlock into two
+pieces: the bind lock (taken *outside* the vfs locks) and the IO lock
+(taken *inside* the filesystem locks). The two locks are independent
+anyway.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/unix/af_unix.c | 68 +++++++++++++++++++++--------------------------------
+ 1 file changed, 27 insertions(+), 41 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -953,20 +953,32 @@ fail:
+ return NULL;
+ }
+
+-static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+- struct path *res)
++static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+ {
+- int err;
+-
+- err = security_path_mknod(path, dentry, mode, 0);
++ struct dentry *dentry;
++ struct path path;
++ int err = 0;
++ /*
++ * Get the parent directory, calculate the hash for last
++ * component.
++ */
++ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
++ err = PTR_ERR(dentry);
++ if (IS_ERR(dentry))
++ return err;
++
++ /*
++ * All right, let's create it.
++ */
++ err = security_path_mknod(&path, dentry, mode, 0);
+ if (!err) {
+- err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
++ err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+ if (!err) {
+- res->mnt = mntget(path->mnt);
++ res->mnt = mntget(path.mnt);
+ res->dentry = dget(dentry);
+ }
+ }
+-
++ done_path_create(&path, dentry);
+ return err;
+ }
+
+@@ -977,12 +989,10 @@ static int unix_bind(struct socket *sock
+ struct unix_sock *u = unix_sk(sk);
+ struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
+ char *sun_path = sunaddr->sun_path;
+- int err, name_err;
++ int err;
+ unsigned int hash;
+ struct unix_address *addr;
+ struct hlist_head *list;
+- struct path path;
+- struct dentry *dentry;
+
+ err = -EINVAL;
+ if (sunaddr->sun_family != AF_UNIX)
+@@ -998,34 +1008,14 @@ static int unix_bind(struct socket *sock
+ goto out;
+ addr_len = err;
+
+- name_err = 0;
+- dentry = NULL;
+- if (sun_path[0]) {
+- /* Get the parent directory, calculate the hash for last
+- * component.
+- */
+- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+-
+- if (IS_ERR(dentry)) {
+- /* delay report until after 'already bound' check */
+- name_err = PTR_ERR(dentry);
+- dentry = NULL;
+- }
+- }
+-
+ err = mutex_lock_interruptible(&u->readlock);
+ if (err)
+- goto out_path;
++ goto out;
+
+ err = -EINVAL;
+ if (u->addr)
+ goto out_up;
+
+- if (name_err) {
+- err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+- goto out_up;
+- }
+-
+ err = -ENOMEM;
+ addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
+ if (!addr)
+@@ -1036,11 +1026,11 @@ static int unix_bind(struct socket *sock
+ addr->hash = hash ^ sk->sk_type;
+ atomic_set(&addr->refcnt, 1);
+
+- if (dentry) {
+- struct path u_path;
++ if (sun_path[0]) {
++ struct path path;
+ umode_t mode = S_IFSOCK |
+ (SOCK_INODE(sock)->i_mode & ~current_umask());
+- err = unix_mknod(dentry, &path, mode, &u_path);
++ err = unix_mknod(sun_path, mode, &path);
+ if (err) {
+ if (err == -EEXIST)
+ err = -EADDRINUSE;
+@@ -1048,9 +1038,9 @@ static int unix_bind(struct socket *sock
+ goto out_up;
+ }
+ addr->hash = UNIX_HASH_SIZE;
+- hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
++ hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+ spin_lock(&unix_table_lock);
+- u->path = u_path;
++ u->path = path;
+ list = &unix_socket_table[hash];
+ } else {
+ spin_lock(&unix_table_lock);
+@@ -1073,10 +1063,6 @@ out_unlock:
+ spin_unlock(&unix_table_lock);
+ out_up:
+ mutex_unlock(&u->readlock);
+-out_path:
+- if (dentry)
+- done_path_create(&path, dentry);
+-
+ out:
+ return err;
+ }