]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.4
authorSasha Levin <sashal@kernel.org>
Thu, 7 Jan 2021 13:41:42 +0000 (08:41 -0500)
committerSasha Levin <sashal@kernel.org>
Thu, 7 Jan 2021 13:41:42 +0000 (08:41 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.4/exec-transform-exec_update_mutex-into-a-rw_semaphore.patch [new file with mode: 0644]
queue-5.4/fuse-fix-bad-inode.patch [new file with mode: 0644]
queue-5.4/perf-break-deadlock-involving-exec_update_mutex.patch [new file with mode: 0644]
queue-5.4/rwsem-implement-down_read_interruptible.patch [new file with mode: 0644]
queue-5.4/rwsem-implement-down_read_killable_nested.patch [new file with mode: 0644]
queue-5.4/series

diff --git a/queue-5.4/exec-transform-exec_update_mutex-into-a-rw_semaphore.patch b/queue-5.4/exec-transform-exec_update_mutex-into-a-rw_semaphore.patch
new file mode 100644 (file)
index 0000000..63108b7
--- /dev/null
@@ -0,0 +1,366 @@
+From c85ff311ef827fc7ece533b1ebb111ef4fd42def Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Dec 2020 14:12:00 -0600
+Subject: exec: Transform exec_update_mutex into a rw_semaphore
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+[ Upstream commit f7cfd871ae0c5008d94b6f66834e7845caa93c15 ]
+
+Recently syzbot reported[0] that there is a deadlock amongst the users
+of exec_update_mutex.  The problematic lock ordering found by lockdep
+was:
+
+   perf_event_open  (exec_update_mutex -> ovl_i_mutex)
+   chown            (ovl_i_mutex       -> sb_writes)
+   sendfile         (sb_writes         -> p->lock)
+     by reading from a proc file and writing to overlayfs
+   proc_pid_syscall (p->lock           -> exec_update_mutex)
+
+While looking at possible solutions it occured to me that all of the
+users and possible users involved only wanted to state of the given
+process to remain the same.  They are all readers.  The only writer is
+exec.
+
+There is no reason for readers to block on each other.  So fix
+this deadlock by transforming exec_update_mutex into a rw_semaphore
+named exec_update_lock that only exec takes for writing.
+
+Cc: Jann Horn <jannh@google.com>
+Cc: Vasiliy Kulikov <segoon@openwall.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Bernd Edlinger <bernd.edlinger@hotmail.de>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Christopher Yeoh <cyeoh@au1.ibm.com>
+Cc: Cyrill Gorcunov <gorcunov@gmail.com>
+Cc: Sargun Dhillon <sargun@sargun.me>
+Cc: Christian Brauner <christian.brauner@ubuntu.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
+Fixes: eea9673250db ("exec: Add exec_update_mutex to replace cred_guard_mutex")
+[0] https://lkml.kernel.org/r/00000000000063640c05ade8e3de@google.com
+Reported-by: syzbot+db9cdf3dd1f64252c6ef@syzkaller.appspotmail.com
+Link: https://lkml.kernel.org/r/87ft4mbqen.fsf@x220.int.ebiederm.org
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/exec.c                    | 12 ++++++------
+ fs/proc/base.c               | 10 +++++-----
+ include/linux/sched/signal.h | 11 ++++++-----
+ init/init_task.c             |  2 +-
+ kernel/events/core.c         | 12 ++++++------
+ kernel/fork.c                |  6 +++---
+ kernel/kcmp.c                | 30 +++++++++++++++---------------
+ kernel/locking/rwsem.c       |  4 ++--
+ 8 files changed, 44 insertions(+), 43 deletions(-)
+
+diff --git a/fs/exec.c b/fs/exec.c
+index 2441eb1a1e2d0..1b4d2206d53a1 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1009,8 +1009,8 @@ EXPORT_SYMBOL(read_code);
+ /*
+  * Maps the mm_struct mm into the current task struct.
+- * On success, this function returns with the mutex
+- * exec_update_mutex locked.
++ * On success, this function returns with exec_update_lock
++ * held for writing.
+  */
+ static int exec_mmap(struct mm_struct *mm)
+ {
+@@ -1023,7 +1023,7 @@ static int exec_mmap(struct mm_struct *mm)
+       old_mm = current->mm;
+       exec_mm_release(tsk, old_mm);
+-      ret = mutex_lock_killable(&tsk->signal->exec_update_mutex);
++      ret = down_write_killable(&tsk->signal->exec_update_lock);
+       if (ret)
+               return ret;
+@@ -1038,7 +1038,7 @@ static int exec_mmap(struct mm_struct *mm)
+               down_read(&old_mm->mmap_sem);
+               if (unlikely(old_mm->core_state)) {
+                       up_read(&old_mm->mmap_sem);
+-                      mutex_unlock(&tsk->signal->exec_update_mutex);
++                      up_write(&tsk->signal->exec_update_lock);
+                       return -EINTR;
+               }
+       }
+@@ -1450,7 +1450,7 @@ static void free_bprm(struct linux_binprm *bprm)
+       free_arg_pages(bprm);
+       if (bprm->cred) {
+               if (bprm->called_exec_mmap)
+-                      mutex_unlock(&current->signal->exec_update_mutex);
++                      up_write(&current->signal->exec_update_lock);
+               mutex_unlock(&current->signal->cred_guard_mutex);
+               abort_creds(bprm->cred);
+       }
+@@ -1500,7 +1500,7 @@ void install_exec_creds(struct linux_binprm *bprm)
+        * credentials; any time after this it may be unlocked.
+        */
+       security_bprm_committed_creds(bprm);
+-      mutex_unlock(&current->signal->exec_update_mutex);
++      up_write(&current->signal->exec_update_lock);
+       mutex_unlock(&current->signal->cred_guard_mutex);
+ }
+ EXPORT_SYMBOL(install_exec_creds);
+diff --git a/fs/proc/base.c b/fs/proc/base.c
+index b690074e65ffa..653c2d8aa1cd7 100644
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -403,11 +403,11 @@ print0:
+ static int lock_trace(struct task_struct *task)
+ {
+-      int err = mutex_lock_killable(&task->signal->exec_update_mutex);
++      int err = down_read_killable(&task->signal->exec_update_lock);
+       if (err)
+               return err;
+       if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
+-              mutex_unlock(&task->signal->exec_update_mutex);
++              up_read(&task->signal->exec_update_lock);
+               return -EPERM;
+       }
+       return 0;
+@@ -415,7 +415,7 @@ static int lock_trace(struct task_struct *task)
+ static void unlock_trace(struct task_struct *task)
+ {
+-      mutex_unlock(&task->signal->exec_update_mutex);
++      up_read(&task->signal->exec_update_lock);
+ }
+ #ifdef CONFIG_STACKTRACE
+@@ -2769,7 +2769,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
+       unsigned long flags;
+       int result;
+-      result = mutex_lock_killable(&task->signal->exec_update_mutex);
++      result = down_read_killable(&task->signal->exec_update_lock);
+       if (result)
+               return result;
+@@ -2805,7 +2805,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
+       result = 0;
+ out_unlock:
+-      mutex_unlock(&task->signal->exec_update_mutex);
++      up_read(&task->signal->exec_update_lock);
+       return result;
+ }
+diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
+index a29df79540ce6..baf58f4cb0578 100644
+--- a/include/linux/sched/signal.h
++++ b/include/linux/sched/signal.h
+@@ -226,12 +226,13 @@ struct signal_struct {
+                                        * credential calculations
+                                        * (notably. ptrace)
+                                        * Deprecated do not use in new code.
+-                                       * Use exec_update_mutex instead.
+-                                       */
+-      struct mutex exec_update_mutex; /* Held while task_struct is being
+-                                       * updated during exec, and may have
+-                                       * inconsistent permissions.
++                                       * Use exec_update_lock instead.
+                                        */
++      struct rw_semaphore exec_update_lock;   /* Held while task_struct is
++                                               * being updated during exec,
++                                               * and may have inconsistent
++                                               * permissions.
++                                               */
+ } __randomize_layout;
+ /*
+diff --git a/init/init_task.c b/init/init_task.c
+index bd403ed3e4184..df7041be96fca 100644
+--- a/init/init_task.c
++++ b/init/init_task.c
+@@ -26,7 +26,7 @@ static struct signal_struct init_signals = {
+       .multiprocess   = HLIST_HEAD_INIT,
+       .rlim           = INIT_RLIMITS,
+       .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
+-      .exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex),
++      .exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock),
+ #ifdef CONFIG_POSIX_TIMERS
+       .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
+       .cputimer       = {
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 18dbdf248ed81..2ef33e9a75910 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -1254,7 +1254,7 @@ static void put_ctx(struct perf_event_context *ctx)
+  * function.
+  *
+  * Lock order:
+- *    exec_update_mutex
++ *    exec_update_lock
+  *    task_struct::perf_event_mutex
+  *      perf_event_context::mutex
+  *        perf_event::child_mutex;
+@@ -11128,14 +11128,14 @@ SYSCALL_DEFINE5(perf_event_open,
+       }
+       if (task) {
+-              err = mutex_lock_interruptible(&task->signal->exec_update_mutex);
++              err = down_read_interruptible(&task->signal->exec_update_lock);
+               if (err)
+                       goto err_file;
+               /*
+                * Preserve ptrace permission check for backwards compatibility.
+                *
+-               * We must hold exec_update_mutex across this and any potential
++               * We must hold exec_update_lock across this and any potential
+                * perf_install_in_context() call for this new event to
+                * serialize against exec() altering our credentials (and the
+                * perf_event_exit_task() that could imply).
+@@ -11298,7 +11298,7 @@ SYSCALL_DEFINE5(perf_event_open,
+       mutex_unlock(&ctx->mutex);
+       if (task) {
+-              mutex_unlock(&task->signal->exec_update_mutex);
++              up_read(&task->signal->exec_update_lock);
+               put_task_struct(task);
+       }
+@@ -11322,7 +11322,7 @@ err_locked:
+       mutex_unlock(&ctx->mutex);
+ err_cred:
+       if (task)
+-              mutex_unlock(&task->signal->exec_update_mutex);
++              up_read(&task->signal->exec_update_lock);
+ err_file:
+       fput(event_file);
+ err_context:
+@@ -11639,7 +11639,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
+ /*
+  * When a child task exits, feed back event values to parent events.
+  *
+- * Can be called with exec_update_mutex held when called from
++ * Can be called with exec_update_lock held when called from
+  * install_exec_creds().
+  */
+ void perf_event_exit_task(struct task_struct *child)
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 419fff8eb9e55..50f37d5afb32b 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1221,7 +1221,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
+       struct mm_struct *mm;
+       int err;
+-      err =  mutex_lock_killable(&task->signal->exec_update_mutex);
++      err =  down_read_killable(&task->signal->exec_update_lock);
+       if (err)
+               return ERR_PTR(err);
+@@ -1231,7 +1231,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
+               mmput(mm);
+               mm = ERR_PTR(-EACCES);
+       }
+-      mutex_unlock(&task->signal->exec_update_mutex);
++      up_read(&task->signal->exec_update_lock);
+       return mm;
+ }
+@@ -1586,7 +1586,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
+       sig->oom_score_adj_min = current->signal->oom_score_adj_min;
+       mutex_init(&sig->cred_guard_mutex);
+-      mutex_init(&sig->exec_update_mutex);
++      init_rwsem(&sig->exec_update_lock);
+       return 0;
+ }
+diff --git a/kernel/kcmp.c b/kernel/kcmp.c
+index b3ff9288c6cc9..c0d2ad9b4705d 100644
+--- a/kernel/kcmp.c
++++ b/kernel/kcmp.c
+@@ -75,25 +75,25 @@ get_file_raw_ptr(struct task_struct *task, unsigned int idx)
+       return file;
+ }
+-static void kcmp_unlock(struct mutex *m1, struct mutex *m2)
++static void kcmp_unlock(struct rw_semaphore *l1, struct rw_semaphore *l2)
+ {
+-      if (likely(m2 != m1))
+-              mutex_unlock(m2);
+-      mutex_unlock(m1);
++      if (likely(l2 != l1))
++              up_read(l2);
++      up_read(l1);
+ }
+-static int kcmp_lock(struct mutex *m1, struct mutex *m2)
++static int kcmp_lock(struct rw_semaphore *l1, struct rw_semaphore *l2)
+ {
+       int err;
+-      if (m2 > m1)
+-              swap(m1, m2);
++      if (l2 > l1)
++              swap(l1, l2);
+-      err = mutex_lock_killable(m1);
+-      if (!err && likely(m1 != m2)) {
+-              err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING);
++      err = down_read_killable(l1);
++      if (!err && likely(l1 != l2)) {
++              err = down_read_killable_nested(l2, SINGLE_DEPTH_NESTING);
+               if (err)
+-                      mutex_unlock(m1);
++                      up_read(l1);
+       }
+       return err;
+@@ -173,8 +173,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
+       /*
+        * One should have enough rights to inspect task details.
+        */
+-      ret = kcmp_lock(&task1->signal->exec_update_mutex,
+-                      &task2->signal->exec_update_mutex);
++      ret = kcmp_lock(&task1->signal->exec_update_lock,
++                      &task2->signal->exec_update_lock);
+       if (ret)
+               goto err;
+       if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) ||
+@@ -229,8 +229,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
+       }
+ err_unlock:
+-      kcmp_unlock(&task1->signal->exec_update_mutex,
+-                  &task2->signal->exec_update_mutex);
++      kcmp_unlock(&task1->signal->exec_update_lock,
++                  &task2->signal->exec_update_lock);
+ err:
+       put_task_struct(task1);
+       put_task_struct(task2);
+diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
+index a5eb87f2c5816..5d54ff3179b80 100644
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -1516,7 +1516,7 @@ int __sched down_read_interruptible(struct rw_semaphore *sem)
+       rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
+       if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
+-              rwsem_release(&sem->dep_map, _RET_IP_);
++              rwsem_release(&sem->dep_map, 1, _RET_IP_);
+               return -EINTR;
+       }
+@@ -1640,7 +1640,7 @@ int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
+       rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
+       if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
+-              rwsem_release(&sem->dep_map, _RET_IP_);
++              rwsem_release(&sem->dep_map, 1, _RET_IP_);
+               return -EINTR;
+       }
+-- 
+2.27.0
+
diff --git a/queue-5.4/fuse-fix-bad-inode.patch b/queue-5.4/fuse-fix-bad-inode.patch
new file mode 100644 (file)
index 0000000..28ba8e8
--- /dev/null
@@ -0,0 +1,402 @@
+From 78405a11e8ab15615234a0a92b0ff4b198d34a69 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Dec 2020 15:33:14 +0100
+Subject: fuse: fix bad inode
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+[ Upstream commit 5d069dbe8aaf2a197142558b6fb2978189ba3454 ]
+
+Jan Kara's analysis of the syzbot report (edited):
+
+  The reproducer opens a directory on FUSE filesystem, it then attaches
+  dnotify mark to the open directory.  After that a fuse_do_getattr() call
+  finds that attributes returned by the server are inconsistent, and calls
+  make_bad_inode() which, among other things does:
+
+          inode->i_mode = S_IFREG;
+
+  This then confuses dnotify which doesn't tear down its structures
+  properly and eventually crashes.
+
+Avoid calling make_bad_inode() on a live inode: switch to a private flag on
+the fuse inode.  Also add the test to ops which the bad_inode_ops would
+have caught.
+
+This bug goes back to the initial merge of fuse in 2.6.14...
+
+Reported-by: syzbot+f427adf9324b92652ccc@syzkaller.appspotmail.com
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Tested-by: Jan Kara <jack@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fuse/acl.c     |  6 ++++++
+ fs/fuse/dir.c     | 37 ++++++++++++++++++++++++++++++++-----
+ fs/fuse/file.c    | 19 +++++++++++--------
+ fs/fuse/fuse_i.h  | 12 ++++++++++++
+ fs/fuse/inode.c   |  4 ++--
+ fs/fuse/readdir.c |  4 ++--
+ fs/fuse/xattr.c   |  9 +++++++++
+ 7 files changed, 74 insertions(+), 17 deletions(-)
+
+diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
+index 5a48cee6d7d33..f529075a2ce87 100644
+--- a/fs/fuse/acl.c
++++ b/fs/fuse/acl.c
+@@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type)
+       void *value = NULL;
+       struct posix_acl *acl;
++      if (fuse_is_bad(inode))
++              return ERR_PTR(-EIO);
++
+       if (!fc->posix_acl || fc->no_getxattr)
+               return NULL;
+@@ -53,6 +56,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+       const char *name;
+       int ret;
++      if (fuse_is_bad(inode))
++              return -EIO;
++
+       if (!fc->posix_acl || fc->no_setxattr)
+               return -EOPNOTSUPP;
+diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
+index ee190119f45cc..60378f3baaae1 100644
+--- a/fs/fuse/dir.c
++++ b/fs/fuse/dir.c
+@@ -201,7 +201,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
+       int ret;
+       inode = d_inode_rcu(entry);
+-      if (inode && is_bad_inode(inode))
++      if (inode && fuse_is_bad(inode))
+               goto invalid;
+       else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
+                (flags & LOOKUP_REVAL)) {
+@@ -386,6 +386,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+       bool outarg_valid = true;
+       bool locked;
++      if (fuse_is_bad(dir))
++              return ERR_PTR(-EIO);
++
+       locked = fuse_lock_inode(dir);
+       err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
+                              &outarg, &inode);
+@@ -529,6 +532,9 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
+       struct fuse_conn *fc = get_fuse_conn(dir);
+       struct dentry *res = NULL;
++      if (fuse_is_bad(dir))
++              return -EIO;
++
+       if (d_in_lookup(entry)) {
+               res = fuse_lookup(dir, entry, 0);
+               if (IS_ERR(res))
+@@ -577,6 +583,9 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
+       int err;
+       struct fuse_forget_link *forget;
++      if (fuse_is_bad(dir))
++              return -EIO;
++
+       forget = fuse_alloc_forget();
+       if (!forget)
+               return -ENOMEM;
+@@ -704,6 +713,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
+       struct fuse_conn *fc = get_fuse_conn(dir);
+       FUSE_ARGS(args);
++      if (fuse_is_bad(dir))
++              return -EIO;
++
+       args.opcode = FUSE_UNLINK;
+       args.nodeid = get_node_id(dir);
+       args.in_numargs = 1;
+@@ -740,6 +752,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
+       struct fuse_conn *fc = get_fuse_conn(dir);
+       FUSE_ARGS(args);
++      if (fuse_is_bad(dir))
++              return -EIO;
++
+       args.opcode = FUSE_RMDIR;
+       args.nodeid = get_node_id(dir);
+       args.in_numargs = 1;
+@@ -818,6 +833,9 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
+       struct fuse_conn *fc = get_fuse_conn(olddir);
+       int err;
++      if (fuse_is_bad(olddir))
++              return -EIO;
++
+       if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+               return -EINVAL;
+@@ -953,7 +971,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
+       if (!err) {
+               if (fuse_invalid_attr(&outarg.attr) ||
+                   (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+-                      make_bad_inode(inode);
++                      fuse_make_bad(inode);
+                       err = -EIO;
+               } else {
+                       fuse_change_attributes(inode, &outarg.attr,
+@@ -1155,6 +1173,9 @@ static int fuse_permission(struct inode *inode, int mask)
+       bool refreshed = false;
+       int err = 0;
++      if (fuse_is_bad(inode))
++              return -EIO;
++
+       if (!fuse_allow_current_process(fc))
+               return -EACCES;
+@@ -1250,7 +1271,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
+       int err;
+       err = -EIO;
+-      if (is_bad_inode(inode))
++      if (fuse_is_bad(inode))
+               goto out_err;
+       if (fc->cache_symlinks)
+@@ -1298,7 +1319,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       int err;
+-      if (is_bad_inode(inode))
++      if (fuse_is_bad(inode))
+               return -EIO;
+       if (fc->no_fsyncdir)
+@@ -1575,7 +1596,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
+       if (fuse_invalid_attr(&outarg.attr) ||
+           (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+-              make_bad_inode(inode);
++              fuse_make_bad(inode);
+               err = -EIO;
+               goto error;
+       }
+@@ -1631,6 +1652,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+       struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
+       int ret;
++      if (fuse_is_bad(inode))
++              return -EIO;
++
+       if (!fuse_allow_current_process(get_fuse_conn(inode)))
+               return -EACCES;
+@@ -1689,6 +1713,9 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
+       struct inode *inode = d_inode(path->dentry);
+       struct fuse_conn *fc = get_fuse_conn(inode);
++      if (fuse_is_bad(inode))
++              return -EIO;
++
+       if (!fuse_allow_current_process(fc))
+               return -EACCES;
+diff --git a/fs/fuse/file.c b/fs/fuse/file.c
+index ab4fc1255aca8..1e1aef1bc20b3 100644
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -222,6 +222,9 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
+                         fc->atomic_o_trunc &&
+                         fc->writeback_cache;
++      if (fuse_is_bad(inode))
++              return -EIO;
++
+       err = generic_file_open(inode, file);
+       if (err)
+               return err;
+@@ -443,7 +446,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
+       FUSE_ARGS(args);
+       int err;
+-      if (is_bad_inode(inode))
++      if (fuse_is_bad(inode))
+               return -EIO;
+       if (fc->no_flush)
+@@ -506,7 +509,7 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end,
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       int err;
+-      if (is_bad_inode(inode))
++      if (fuse_is_bad(inode))
+               return -EIO;
+       inode_lock(inode);
+@@ -830,7 +833,7 @@ static int fuse_readpage(struct file *file, struct page *page)
+       int err;
+       err = -EIO;
+-      if (is_bad_inode(inode))
++      if (fuse_is_bad(inode))
+               goto out;
+       err = fuse_do_readpage(file, page);
+@@ -973,7 +976,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
+       int err;
+       err = -EIO;
+-      if (is_bad_inode(inode))
++      if (fuse_is_bad(inode))
+               goto out;
+       data.file = file;
+@@ -1569,7 +1572,7 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+       struct file *file = iocb->ki_filp;
+       struct fuse_file *ff = file->private_data;
+-      if (is_bad_inode(file_inode(file)))
++      if (fuse_is_bad(file_inode(file)))
+               return -EIO;
+       if (!(ff->open_flags & FOPEN_DIRECT_IO))
+@@ -1583,7 +1586,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+       struct file *file = iocb->ki_filp;
+       struct fuse_file *ff = file->private_data;
+-      if (is_bad_inode(file_inode(file)))
++      if (fuse_is_bad(file_inode(file)))
+               return -EIO;
+       if (!(ff->open_flags & FOPEN_DIRECT_IO))
+@@ -2133,7 +2136,7 @@ static int fuse_writepages(struct address_space *mapping,
+       int err;
+       err = -EIO;
+-      if (is_bad_inode(inode))
++      if (fuse_is_bad(inode))
+               goto out;
+       data.inode = inode;
+@@ -2911,7 +2914,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
+       if (!fuse_allow_current_process(fc))
+               return -EACCES;
+-      if (is_bad_inode(inode))
++      if (fuse_is_bad(inode))
+               return -EIO;
+       return fuse_do_ioctl(file, cmd, arg, flags);
+diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
+index d7cde216fc871..e3688312e9f1b 100644
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -158,6 +158,8 @@ enum {
+       FUSE_I_INIT_RDPLUS,
+       /** An operation changing file size is in progress  */
+       FUSE_I_SIZE_UNSTABLE,
++      /* Bad inode */
++      FUSE_I_BAD,
+ };
+ struct fuse_conn;
+@@ -787,6 +789,16 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
+       return atomic64_read(&fc->attr_version);
+ }
++static inline void fuse_make_bad(struct inode *inode)
++{
++      set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
++}
++
++static inline bool fuse_is_bad(struct inode *inode)
++{
++      return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
++}
++
+ /** Device operations */
+ extern const struct file_operations fuse_dev_operations;
+diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
+index f58ab84b09fb3..aa1d5cf1bc3a4 100644
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -115,7 +115,7 @@ static void fuse_evict_inode(struct inode *inode)
+               fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
+               fi->forget = NULL;
+       }
+-      if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
++      if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
+               WARN_ON(!list_empty(&fi->write_files));
+               WARN_ON(!list_empty(&fi->queued_writes));
+       }
+@@ -306,7 +306,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
+               unlock_new_inode(inode);
+       } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
+               /* Inode has changed type, any I/O on the old should fail */
+-              make_bad_inode(inode);
++              fuse_make_bad(inode);
+               iput(inode);
+               goto retry;
+       }
+diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
+index 6a40f75a0d25e..70f685b61e3a5 100644
+--- a/fs/fuse/readdir.c
++++ b/fs/fuse/readdir.c
+@@ -207,7 +207,7 @@ retry:
+                       dput(dentry);
+                       goto retry;
+               }
+-              if (is_bad_inode(inode)) {
++              if (fuse_is_bad(inode)) {
+                       dput(dentry);
+                       return -EIO;
+               }
+@@ -568,7 +568,7 @@ int fuse_readdir(struct file *file, struct dir_context *ctx)
+       struct inode *inode = file_inode(file);
+       int err;
+-      if (is_bad_inode(inode))
++      if (fuse_is_bad(inode))
+               return -EIO;
+       mutex_lock(&ff->readdir.lock);
+diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
+index 20d052e08b3be..28fed52957707 100644
+--- a/fs/fuse/xattr.c
++++ b/fs/fuse/xattr.c
+@@ -113,6 +113,9 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+       struct fuse_getxattr_out outarg;
+       ssize_t ret;
++      if (fuse_is_bad(inode))
++              return -EIO;
++
+       if (!fuse_allow_current_process(fc))
+               return -EACCES;
+@@ -178,6 +181,9 @@ static int fuse_xattr_get(const struct xattr_handler *handler,
+                        struct dentry *dentry, struct inode *inode,
+                        const char *name, void *value, size_t size)
+ {
++      if (fuse_is_bad(inode))
++              return -EIO;
++
+       return fuse_getxattr(inode, name, value, size);
+ }
+@@ -186,6 +192,9 @@ static int fuse_xattr_set(const struct xattr_handler *handler,
+                         const char *name, const void *value, size_t size,
+                         int flags)
+ {
++      if (fuse_is_bad(inode))
++              return -EIO;
++
+       if (!value)
+               return fuse_removexattr(inode, name);
+-- 
+2.27.0
+
diff --git a/queue-5.4/perf-break-deadlock-involving-exec_update_mutex.patch b/queue-5.4/perf-break-deadlock-involving-exec_update_mutex.patch
new file mode 100644 (file)
index 0000000..4bf19e6
--- /dev/null
@@ -0,0 +1,115 @@
+From a764af288b588a2081944c7713756bb9825a07df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Aug 2020 14:37:20 +0200
+Subject: perf: Break deadlock involving exec_update_mutex
+
+From: peterz@infradead.org <peterz@infradead.org>
+
+[ Upstream commit 78af4dc949daaa37b3fcd5f348f373085b4e858f ]
+
+Syzbot reported a lock inversion involving perf. The sore point being
+perf holding exec_update_mutex() for a very long time, specifically
+across a whole bunch of filesystem ops in pmu::event_init() (uprobes)
+and anon_inode_getfile().
+
+This then inverts against procfs code trying to take
+exec_update_mutex.
+
+Move the permission checks later, such that we need to hold the mutex
+over less code.
+
+Reported-by: syzbot+db9cdf3dd1f64252c6ef@syzkaller.appspotmail.com
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/core.c | 46 ++++++++++++++++++++++----------------------
+ 1 file changed, 23 insertions(+), 23 deletions(-)
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 9f7c2da992991..18dbdf248ed81 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -11001,24 +11001,6 @@ SYSCALL_DEFINE5(perf_event_open,
+               goto err_task;
+       }
+-      if (task) {
+-              err = mutex_lock_interruptible(&task->signal->exec_update_mutex);
+-              if (err)
+-                      goto err_task;
+-
+-              /*
+-               * Reuse ptrace permission checks for now.
+-               *
+-               * We must hold exec_update_mutex across this and any potential
+-               * perf_install_in_context() call for this new event to
+-               * serialize against exec() altering our credentials (and the
+-               * perf_event_exit_task() that could imply).
+-               */
+-              err = -EACCES;
+-              if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
+-                      goto err_cred;
+-      }
+-
+       if (flags & PERF_FLAG_PID_CGROUP)
+               cgroup_fd = pid;
+@@ -11026,7 +11008,7 @@ SYSCALL_DEFINE5(perf_event_open,
+                                NULL, NULL, cgroup_fd);
+       if (IS_ERR(event)) {
+               err = PTR_ERR(event);
+-              goto err_cred;
++              goto err_task;
+       }
+       if (is_sampling_event(event)) {
+@@ -11145,6 +11127,24 @@ SYSCALL_DEFINE5(perf_event_open,
+               goto err_context;
+       }
++      if (task) {
++              err = mutex_lock_interruptible(&task->signal->exec_update_mutex);
++              if (err)
++                      goto err_file;
++
++              /*
++               * Preserve ptrace permission check for backwards compatibility.
++               *
++               * We must hold exec_update_mutex across this and any potential
++               * perf_install_in_context() call for this new event to
++               * serialize against exec() altering our credentials (and the
++               * perf_event_exit_task() that could imply).
++               */
++              err = -EACCES;
++              if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
++                      goto err_cred;
++      }
++
+       if (move_group) {
+               gctx = __perf_event_ctx_lock_double(group_leader, ctx);
+@@ -11320,7 +11320,10 @@ err_locked:
+       if (move_group)
+               perf_event_ctx_unlock(group_leader, gctx);
+       mutex_unlock(&ctx->mutex);
+-/* err_file: */
++err_cred:
++      if (task)
++              mutex_unlock(&task->signal->exec_update_mutex);
++err_file:
+       fput(event_file);
+ err_context:
+       perf_unpin_context(ctx);
+@@ -11332,9 +11335,6 @@ err_alloc:
+        */
+       if (!event_file)
+               free_event(event);
+-err_cred:
+-      if (task)
+-              mutex_unlock(&task->signal->exec_update_mutex);
+ err_task:
+       if (task)
+               put_task_struct(task);
+-- 
+2.27.0
+
diff --git a/queue-5.4/rwsem-implement-down_read_interruptible.patch b/queue-5.4/rwsem-implement-down_read_interruptible.patch
new file mode 100644 (file)
index 0000000..dcf538b
--- /dev/null
@@ -0,0 +1,83 @@
+From e21b8174136ef3273be0492e181566c8ae55a055 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Dec 2020 14:11:13 -0600
+Subject: rwsem: Implement down_read_interruptible
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+[ Upstream commit 31784cff7ee073b34d6eddabb95e3be2880a425c ]
+
+In preparation for converting exec_update_mutex to a rwsem so that
+multiple readers can execute in parallel and not deadlock, add
+down_read_interruptible.  This is needed for perf_event_open to be
+converted (with no semantic changes) from working on a mutex to
+wroking on a rwsem.
+
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/87k0tybqfy.fsf@x220.int.ebiederm.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/rwsem.h  |  1 +
+ kernel/locking/rwsem.c | 26 ++++++++++++++++++++++++++
+ 2 files changed, 27 insertions(+)
+
+diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
+index c91ac00d1ff8c..8a3606372abc8 100644
+--- a/include/linux/rwsem.h
++++ b/include/linux/rwsem.h
+@@ -125,6 +125,7 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem)
+  * lock for reading
+  */
+ extern void down_read(struct rw_semaphore *sem);
++extern int __must_check down_read_interruptible(struct rw_semaphore *sem);
+ extern int __must_check down_read_killable(struct rw_semaphore *sem);
+ /*
+diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
+index 2ce13f9585779..a5eb87f2c5816 100644
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -1348,6 +1348,18 @@ inline void __down_read(struct rw_semaphore *sem)
+       }
+ }
++static inline int __down_read_interruptible(struct rw_semaphore *sem)
++{
++      if (!rwsem_read_trylock(sem)) {
++              if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_INTERRUPTIBLE)))
++                      return -EINTR;
++              DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
++      } else {
++              rwsem_set_reader_owned(sem);
++      }
++      return 0;
++}
++
+ static inline int __down_read_killable(struct rw_semaphore *sem)
+ {
+       if (!rwsem_read_trylock(sem)) {
+@@ -1498,6 +1510,20 @@ void __sched down_read(struct rw_semaphore *sem)
+ }
+ EXPORT_SYMBOL(down_read);
++int __sched down_read_interruptible(struct rw_semaphore *sem)
++{
++      might_sleep();
++      rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
++
++      if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
++              rwsem_release(&sem->dep_map, _RET_IP_);
++              return -EINTR;
++      }
++
++      return 0;
++}
++EXPORT_SYMBOL(down_read_interruptible);
++
+ int __sched down_read_killable(struct rw_semaphore *sem)
+ {
+       might_sleep();
+-- 
+2.27.0
+
diff --git a/queue-5.4/rwsem-implement-down_read_killable_nested.patch b/queue-5.4/rwsem-implement-down_read_killable_nested.patch
new file mode 100644 (file)
index 0000000..55ad35d
--- /dev/null
@@ -0,0 +1,71 @@
+From 80190d76a4650da7dce1a5f89f2f239f96abf72d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Dec 2020 14:10:32 -0600
+Subject: rwsem: Implement down_read_killable_nested
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+[ Upstream commit 0f9368b5bf6db0c04afc5454b1be79022a681615 ]
+
+In preparation for converting exec_update_mutex to a rwsem so that
+multiple readers can execute in parallel and not deadlock, add
+down_read_killable_nested.  This is needed so that kcmp_lock
+can be converted from working on a mutexes to working on rw_semaphores.
+
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/87o8jabqh3.fsf@x220.int.ebiederm.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/rwsem.h  |  2 ++
+ kernel/locking/rwsem.c | 14 ++++++++++++++
+ 2 files changed, 16 insertions(+)
+
+diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
+index 00d6054687dd2..c91ac00d1ff8c 100644
+--- a/include/linux/rwsem.h
++++ b/include/linux/rwsem.h
+@@ -173,6 +173,7 @@ extern void downgrade_write(struct rw_semaphore *sem);
+  * See Documentation/locking/lockdep-design.rst for more details.)
+  */
+ extern void down_read_nested(struct rw_semaphore *sem, int subclass);
++extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass);
+ extern void down_write_nested(struct rw_semaphore *sem, int subclass);
+ extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass);
+ extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock);
+@@ -193,6 +194,7 @@ extern void down_read_non_owner(struct rw_semaphore *sem);
+ extern void up_read_non_owner(struct rw_semaphore *sem);
+ #else
+ # define down_read_nested(sem, subclass)              down_read(sem)
++# define down_read_killable_nested(sem, subclass)     down_read_killable(sem)
+ # define down_write_nest_lock(sem, nest_lock) down_write(sem)
+ # define down_write_nested(sem, subclass)     down_write(sem)
+ # define down_write_killable_nested(sem, subclass)    down_write_killable(sem)
+diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
+index baafa1dd9fcc4..2ce13f9585779 100644
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -1608,6 +1608,20 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
+ }
+ EXPORT_SYMBOL(down_read_nested);
++int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
++{
++      might_sleep();
++      rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
++
++      if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
++              rwsem_release(&sem->dep_map, _RET_IP_);
++              return -EINTR;
++      }
++
++      return 0;
++}
++EXPORT_SYMBOL(down_read_killable_nested);
++
+ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
+ {
+       might_sleep();
+-- 
+2.27.0
+
index 48948f0a675ca33c5a40399c720c4969f8d5fddf..30f5c86a44df4b69ae2af0083f8178edcff8b01f 100644 (file)
@@ -5,3 +5,8 @@ dmaengine-at_hdmac-add-missing-put_device-call-in-at_dma_xlate.patch
 dmaengine-at_hdmac-add-missing-kfree-call-in-at_dma_xlate.patch
 kdev_t-always-inline-major-minor-helper-functions.patch
 iio-imu-bmi160-fix-alignment-and-data-leak-issues.patch
+fuse-fix-bad-inode.patch
+perf-break-deadlock-involving-exec_update_mutex.patch
+rwsem-implement-down_read_killable_nested.patch
+rwsem-implement-down_read_interruptible.patch
+exec-transform-exec_update_mutex-into-a-rw_semaphore.patch