]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 26 Sep 2016 10:25:00 +0000 (12:25 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 26 Sep 2016 10:25:00 +0000 (12:25 +0200)
added patches:
af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch
revert-af_unix-fix-splice-bind-deadlock.patch

queue-4.4/af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch [new file with mode: 0644]
queue-4.4/revert-af_unix-fix-splice-bind-deadlock.patch [new file with mode: 0644]
queue-4.4/series

diff --git a/queue-4.4/af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch b/queue-4.4/af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch
new file mode 100644 (file)
index 0000000..a5eecbd
--- /dev/null
@@ -0,0 +1,215 @@
+From 6e1ce3c3451291142a57c4f3f6f999a29fb5b3bc Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 1 Sep 2016 14:43:53 -0700
+Subject: af_unix: split 'u->readlock' into two: 'iolock' and 'bindlock'
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 6e1ce3c3451291142a57c4f3f6f999a29fb5b3bc upstream.
+
+Right now we use the 'readlock' both for protecting some of the af_unix
+IO path and for making the bind be single-threaded.
+
+The two are independent, but using the same lock makes for a nasty
+deadlock due to ordering with regards to filesystem locking.  The bind
+locking would want to nest outside the VSF pathname locking, but the IO
+locking wants to nest inside some of those same locks.
+
+We tried to fix this earlier with commit c845acb324aa ("af_unix: Fix
+splice-bind deadlock") which moved the readlock inside the vfs locks,
+but that caused problems with overlayfs that will then call back into
+filesystem routines that take the lock in the wrong order anyway.
+
+Splitting the locks means that we can go back to having the bind lock be
+the outermost lock, and we don't have any deadlocks with lock ordering.
+
+Acked-by: Rainer Weikusat <rweikusat@cyberadapt.com>
+Acked-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/af_unix.h |    2 +-
+ net/unix/af_unix.c    |   41 +++++++++++++++++++++--------------------
+ 2 files changed, 22 insertions(+), 21 deletions(-)
+
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -52,7 +52,7 @@ struct unix_sock {
+       struct sock             sk;
+       struct unix_address     *addr;
+       struct path             path;
+-      struct mutex            readlock;
++      struct mutex            iolock, bindlock;
+       struct sock             *peer;
+       struct list_head        link;
+       atomic_long_t           inflight;
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock
+ {
+       struct unix_sock *u = unix_sk(sk);
+-      if (mutex_lock_interruptible(&u->readlock))
++      if (mutex_lock_interruptible(&u->iolock))
+               return -EINTR;
+       sk->sk_peek_off = val;
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->iolock);
+       return 0;
+ }
+@@ -778,7 +778,8 @@ static struct sock *unix_create1(struct
+       spin_lock_init(&u->lock);
+       atomic_long_set(&u->inflight, 0);
+       INIT_LIST_HEAD(&u->link);
+-      mutex_init(&u->readlock); /* single task reading lock */
++      mutex_init(&u->iolock); /* single task reading lock */
++      mutex_init(&u->bindlock); /* single task binding lock */
+       init_waitqueue_head(&u->peer_wait);
+       init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
+       unix_insert_socket(unix_sockets_unbound(sk), sk);
+@@ -847,7 +848,7 @@ static int unix_autobind(struct socket *
+       int err;
+       unsigned int retries = 0;
+-      err = mutex_lock_interruptible(&u->readlock);
++      err = mutex_lock_interruptible(&u->bindlock);
+       if (err)
+               return err;
+@@ -894,7 +895,7 @@ retry:
+       spin_unlock(&unix_table_lock);
+       err = 0;
+-out:  mutex_unlock(&u->readlock);
++out:  mutex_unlock(&u->bindlock);
+       return err;
+ }
+@@ -1008,7 +1009,7 @@ static int unix_bind(struct socket *sock
+               goto out;
+       addr_len = err;
+-      err = mutex_lock_interruptible(&u->readlock);
++      err = mutex_lock_interruptible(&u->bindlock);
+       if (err)
+               goto out;
+@@ -1062,7 +1063,7 @@ static int unix_bind(struct socket *sock
+ out_unlock:
+       spin_unlock(&unix_table_lock);
+ out_up:
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->bindlock);
+ out:
+       return err;
+ }
+@@ -1957,17 +1958,17 @@ static ssize_t unix_stream_sendpage(stru
+       if (false) {
+ alloc_skb:
+               unix_state_unlock(other);
+-              mutex_unlock(&unix_sk(other)->readlock);
++              mutex_unlock(&unix_sk(other)->iolock);
+               newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
+                                             &err, 0);
+               if (!newskb)
+                       goto err;
+       }
+-      /* we must acquire readlock as we modify already present
++      /* we must acquire iolock as we modify already present
+        * skbs in the sk_receive_queue and mess with skb->len
+        */
+-      err = mutex_lock_interruptible(&unix_sk(other)->readlock);
++      err = mutex_lock_interruptible(&unix_sk(other)->iolock);
+       if (err) {
+               err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
+               goto err;
+@@ -2034,7 +2035,7 @@ alloc_skb:
+       }
+       unix_state_unlock(other);
+-      mutex_unlock(&unix_sk(other)->readlock);
++      mutex_unlock(&unix_sk(other)->iolock);
+       other->sk_data_ready(other);
+       scm_destroy(&scm);
+@@ -2043,7 +2044,7 @@ alloc_skb:
+ err_state_unlock:
+       unix_state_unlock(other);
+ err_unlock:
+-      mutex_unlock(&unix_sk(other)->readlock);
++      mutex_unlock(&unix_sk(other)->iolock);
+ err:
+       kfree_skb(newskb);
+       if (send_sigpipe && !(flags & MSG_NOSIGNAL))
+@@ -2108,7 +2109,7 @@ static int unix_dgram_recvmsg(struct soc
+       if (flags&MSG_OOB)
+               goto out;
+-      err = mutex_lock_interruptible(&u->readlock);
++      err = mutex_lock_interruptible(&u->iolock);
+       if (unlikely(err)) {
+               /* recvmsg() in non blocking mode is supposed to return -EAGAIN
+                * sk_rcvtimeo is not honored by mutex_lock_interruptible()
+@@ -2184,7 +2185,7 @@ static int unix_dgram_recvmsg(struct soc
+ out_free:
+       skb_free_datagram(sk, skb);
+ out_unlock:
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->iolock);
+ out:
+       return err;
+ }
+@@ -2279,7 +2280,7 @@ static int unix_stream_read_generic(stru
+       /* Lock the socket to prevent queue disordering
+        * while sleeps in memcpy_tomsg
+        */
+-      mutex_lock(&u->readlock);
++      mutex_lock(&u->iolock);
+       if (flags & MSG_PEEK)
+               skip = sk_peek_offset(sk, flags);
+@@ -2320,7 +2321,7 @@ again:
+                               break;
+                       }
+-                      mutex_unlock(&u->readlock);
++                      mutex_unlock(&u->iolock);
+                       timeo = unix_stream_data_wait(sk, timeo, last,
+                                                     last_len);
+@@ -2331,7 +2332,7 @@ again:
+                               goto out;
+                       }
+-                      mutex_lock(&u->readlock);
++                      mutex_lock(&u->iolock);
+                       continue;
+ unlock:
+                       unix_state_unlock(sk);
+@@ -2434,7 +2435,7 @@ unlock:
+               }
+       } while (size);
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->iolock);
+       if (state->msg)
+               scm_recv(sock, state->msg, &scm, flags);
+       else
+@@ -2475,9 +2476,9 @@ static ssize_t skb_unix_socket_splice(st
+       int ret;
+       struct unix_sock *u = unix_sk(sk);
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->iolock);
+       ret = splice_to_pipe(pipe, spd);
+-      mutex_lock(&u->readlock);
++      mutex_lock(&u->iolock);
+       return ret;
+ }
diff --git a/queue-4.4/revert-af_unix-fix-splice-bind-deadlock.patch b/queue-4.4/revert-af_unix-fix-splice-bind-deadlock.patch
new file mode 100644 (file)
index 0000000..e5b8dfd
--- /dev/null
@@ -0,0 +1,161 @@
+From 38f7bd94a97b542de86a2be9229289717e33a7a4 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 1 Sep 2016 14:56:49 -0700
+Subject: Revert "af_unix: Fix splice-bind deadlock"
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 38f7bd94a97b542de86a2be9229289717e33a7a4 upstream.
+
+This reverts commit c845acb324aa85a39650a14e7696982ceea75dc1.
+
+It turns out that it just replaces one deadlock with another one: we can
+still get the wrong lock ordering with the readlock due to overlayfs
+calling back into the filesystem layer and still taking the vfs locks
+after the readlock.
+
+The proper solution ends up being to just split the readlock into two
+pieces: the bind lock (taken *outside* the vfs locks) and the IO lock
+(taken *inside* the filesystem locks).  The two locks are independent
+anyway.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/unix/af_unix.c |   68 +++++++++++++++++++++--------------------------------
+ 1 file changed, 27 insertions(+), 41 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -953,20 +953,32 @@ fail:
+       return NULL;
+ }
+-static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+-                    struct path *res)
++static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+ {
+-      int err;
+-
+-      err = security_path_mknod(path, dentry, mode, 0);
++      struct dentry *dentry;
++      struct path path;
++      int err = 0;
++      /*
++       * Get the parent directory, calculate the hash for last
++       * component.
++       */
++      dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
++      err = PTR_ERR(dentry);
++      if (IS_ERR(dentry))
++              return err;
++
++      /*
++       * All right, let's create it.
++       */
++      err = security_path_mknod(&path, dentry, mode, 0);
+       if (!err) {
+-              err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
++              err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+               if (!err) {
+-                      res->mnt = mntget(path->mnt);
++                      res->mnt = mntget(path.mnt);
+                       res->dentry = dget(dentry);
+               }
+       }
+-
++      done_path_create(&path, dentry);
+       return err;
+ }
+@@ -977,12 +989,10 @@ static int unix_bind(struct socket *sock
+       struct unix_sock *u = unix_sk(sk);
+       struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
+       char *sun_path = sunaddr->sun_path;
+-      int err, name_err;
++      int err;
+       unsigned int hash;
+       struct unix_address *addr;
+       struct hlist_head *list;
+-      struct path path;
+-      struct dentry *dentry;
+       err = -EINVAL;
+       if (sunaddr->sun_family != AF_UNIX)
+@@ -998,34 +1008,14 @@ static int unix_bind(struct socket *sock
+               goto out;
+       addr_len = err;
+-      name_err = 0;
+-      dentry = NULL;
+-      if (sun_path[0]) {
+-              /* Get the parent directory, calculate the hash for last
+-               * component.
+-               */
+-              dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+-
+-              if (IS_ERR(dentry)) {
+-                      /* delay report until after 'already bound' check */
+-                      name_err = PTR_ERR(dentry);
+-                      dentry = NULL;
+-              }
+-      }
+-
+       err = mutex_lock_interruptible(&u->readlock);
+       if (err)
+-              goto out_path;
++              goto out;
+       err = -EINVAL;
+       if (u->addr)
+               goto out_up;
+-      if (name_err) {
+-              err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+-              goto out_up;
+-      }
+-
+       err = -ENOMEM;
+       addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
+       if (!addr)
+@@ -1036,11 +1026,11 @@ static int unix_bind(struct socket *sock
+       addr->hash = hash ^ sk->sk_type;
+       atomic_set(&addr->refcnt, 1);
+-      if (dentry) {
+-              struct path u_path;
++      if (sun_path[0]) {
++              struct path path;
+               umode_t mode = S_IFSOCK |
+                      (SOCK_INODE(sock)->i_mode & ~current_umask());
+-              err = unix_mknod(dentry, &path, mode, &u_path);
++              err = unix_mknod(sun_path, mode, &path);
+               if (err) {
+                       if (err == -EEXIST)
+                               err = -EADDRINUSE;
+@@ -1048,9 +1038,9 @@ static int unix_bind(struct socket *sock
+                       goto out_up;
+               }
+               addr->hash = UNIX_HASH_SIZE;
+-              hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
++              hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+               spin_lock(&unix_table_lock);
+-              u->path = u_path;
++              u->path = path;
+               list = &unix_socket_table[hash];
+       } else {
+               spin_lock(&unix_table_lock);
+@@ -1073,10 +1063,6 @@ out_unlock:
+       spin_unlock(&unix_table_lock);
+ out_up:
+       mutex_unlock(&u->readlock);
+-out_path:
+-      if (dentry)
+-              done_path_create(&path, dentry);
+-
+ out:
+       return err;
+ }
index a84c9f07509fd5742dd68f3affcb72bf0960121b..265fb034a8ff6e94cc3feb6df9ae58cd38658568 100644 (file)
@@ -30,3 +30,5 @@ staging-iio-adc-fix-indent-on-break-statement.patch
 nouveau-fix-nv40_perfctr_next-cleanup-regression.patch
 megaraid-fix-null-pointer-check-in-megasas_detach_one.patch
 bonding-fix-bonding-crash.patch
+revert-af_unix-fix-splice-bind-deadlock.patch
+af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch