--- /dev/null
+From e386dfc56f837da66d00a078e5314bc8382fab83 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Fri, 10 Dec 2021 14:00:15 -0800
+Subject: fget: clarify and improve __fget_files() implementation
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit e386dfc56f837da66d00a078e5314bc8382fab83 upstream.
+
+Commit 054aa8d439b9 ("fget: check that the fd still exists after getting
+a ref to it") fixed a race with getting a reference to a file just as it
+was being closed. It was a fairly minimal patch, and I didn't think
+re-checking the file pointer lookup would be a measurable overhead,
+since it was all right there and cached.
+
+But I was wrong, as pointed out by the kernel test robot.
+
+The 'poll2' case of the will-it-scale.per_thread_ops benchmark regressed
+quite noticeably. Admittedly it seems to be a very artificial test:
+doing "poll()" system calls on regular files in a very tight loop in
+multiple threads.
+
+That means that basically all the time is spent just looking up file
+descriptors without ever doing anything useful with them (not that doing
+'poll()' on a regular file is useful to begin with). And as a result it
+shows the extra "re-check fd" cost as a sore thumb.
+
+Happily, the regression is fixable by just writing the code to loook up
+the fd to be better and clearer. There's still a cost to verify the
+file pointer, but now it's basically in the noise even for that
+benchmark that does nothing else - and the code is more understandable
+and has better comments too.
+
+[ Side note: this patch is also a classic case of one that looks very
+ messy with the default greedy Myers diff - it's much more legible with
+ either the patience of histogram diff algorithm ]
+
+Link: https://lore.kernel.org/lkml/20211210053743.GA36420@xsang-OptiPlex-9020/
+Link: https://lore.kernel.org/lkml/20211213083154.GA20853@linux.intel.com/
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Tested-by: Carel Si <beibei.si@intel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/file.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 56 insertions(+), 16 deletions(-)
+
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -841,28 +841,68 @@ void do_close_on_exec(struct files_struc
+ spin_unlock(&files->file_lock);
+ }
+
+-static struct file *__fget_files(struct files_struct *files, unsigned int fd,
+- fmode_t mask, unsigned int refs)
++static inline struct file *__fget_files_rcu(struct files_struct *files,
++ unsigned int fd, fmode_t mask, unsigned int refs)
+ {
+- struct file *file;
++ for (;;) {
++ struct file *file;
++ struct fdtable *fdt = rcu_dereference_raw(files->fdt);
++ struct file __rcu **fdentry;
+
+- rcu_read_lock();
+-loop:
+- file = files_lookup_fd_rcu(files, fd);
+- if (file) {
+- /* File object ref couldn't be taken.
+- * dup2() atomicity guarantee is the reason
+- * we loop to catch the new file (or NULL pointer)
++ if (unlikely(fd >= fdt->max_fds))
++ return NULL;
++
++ fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
++ file = rcu_dereference_raw(*fdentry);
++ if (unlikely(!file))
++ return NULL;
++
++ if (unlikely(file->f_mode & mask))
++ return NULL;
++
++ /*
++ * Ok, we have a file pointer. However, because we do
++ * this all locklessly under RCU, we may be racing with
++ * that file being closed.
++ *
++ * Such a race can take two forms:
++ *
++ * (a) the file ref already went down to zero,
++ * and get_file_rcu_many() fails. Just try
++ * again:
+ */
+- if (file->f_mode & mask)
+- file = NULL;
+- else if (!get_file_rcu_many(file, refs))
+- goto loop;
+- else if (files_lookup_fd_raw(files, fd) != file) {
++ if (unlikely(!get_file_rcu_many(file, refs)))
++ continue;
++
++ /*
++ * (b) the file table entry has changed under us.
++ * Note that we don't need to re-check the 'fdt->fd'
++ * pointer having changed, because it always goes
++ * hand-in-hand with 'fdt'.
++ *
++ * If so, we need to put our refs and try again.
++ */
++ if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
++ unlikely(rcu_dereference_raw(*fdentry) != file)) {
+ fput_many(file, refs);
+- goto loop;
++ continue;
+ }
++
++ /*
++ * Ok, we have a ref to the file, and checked that it
++ * still exists.
++ */
++ return file;
+ }
++}
++
++static struct file *__fget_files(struct files_struct *files, unsigned int fd,
++ fmode_t mask, unsigned int refs)
++{
++ struct file *file;
++
++ rcu_read_lock();
++ file = __fget_files_rcu(files, fd, mask, refs);
+ rcu_read_unlock();
+
+ return file;