]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
Improve __fget_files_rcu() code generation (and thus __fget_light())
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 26 Nov 2023 20:24:38 +0000 (12:24 -0800)
committerChristian Brauner <brauner@kernel.org>
Tue, 12 Dec 2023 13:24:13 +0000 (14:24 +0100)
Commit 0ede61d8589c ("file: convert to SLAB_TYPESAFE_BY_RCU") caused a
performance regression as reported by the kernel test robot.

The __fget_light() function is one of those critical ones for some
loads, and the code generation was unnecessarily impacted.  Let's just
write that function to better.

Reported-by: kernel test robot <oliver.sang@intel.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Closes: https://lore.kernel.org/oe-lkp/202311201406.2022ca3f-oliver.sang@intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/CAHk-=wiCJtLbFWNURB34b9a_R_unaH3CiMRXfkR0-iihB_z68A@mail.gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/file.c
include/linux/fdtable.h

index 5fb0b146e79ec0f7263a8dd98ec81a305b42cfb1..50df31e104a59c20f377b0e1aead8473195ccbf9 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -959,31 +959,45 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
                struct file *file;
                struct fdtable *fdt = rcu_dereference_raw(files->fdt);
                struct file __rcu **fdentry;
+               unsigned long nospec_mask;
 
-               if (unlikely(fd >= fdt->max_fds))
-                       return NULL;
-
-               fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
+               /* Mask is a 0 for invalid fd's, ~0 for valid ones */
+               nospec_mask = array_index_mask_nospec(fd, fdt->max_fds);
 
                /*
-                * Ok, we have a file pointer. However, because we do
-                * this all locklessly under RCU, we may be racing with
-                * that file being closed.
-                *
-                * Such a race can take two forms:
-                *
-                *  (a) the file ref already went down to zero and the
-                *      file hasn't been reused yet or the file count
-                *      isn't zero but the file has already been reused.
+                * fdentry points to the 'fd' offset, or fdt->fd[0].
+                * Loading from fdt->fd[0] is always safe, because the
+                * array always exists.
                 */
-               file = __get_file_rcu(fdentry);
+               fdentry = fdt->fd + (fd & nospec_mask);
+
+               /* Do the load, then mask any invalid result */
+               file = rcu_dereference_raw(*fdentry);
+               file = (void *)(nospec_mask & (unsigned long)file);
                if (unlikely(!file))
                        return NULL;
 
-               if (unlikely(IS_ERR(file)))
+               /*
+                * Ok, we have a file pointer that was valid at
+                * some point, but it might have become stale since.
+                *
+                * We need to confirm it by incrementing the refcount
+                * and then check the lookup again.
+                *
+                * atomic_long_inc_not_zero() gives us a full memory
+                * barrier. We only really need an 'acquire' one to
+                * protect the loads below, but we don't have that.
+                */
+               if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
                        continue;
 
                /*
+                * Such a race can take two forms:
+                *
+                *  (a) the file ref already went down to zero and the
+                *      file hasn't been reused yet or the file count
+                *      isn't zero but the file has already been reused.
+                *
                 *  (b) the file table entry has changed under us.
                 *       Note that we don't need to re-check the 'fdt->fd'
                 *       pointer having changed, because it always goes
@@ -991,7 +1005,8 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
                 *
                 * If so, we need to put our ref and try again.
                 */
-               if (unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
+               if (unlikely(file != rcu_dereference_raw(*fdentry)) ||
+                   unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
                        fput(file);
                        continue;
                }
@@ -1128,13 +1143,13 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
         * atomic_read_acquire() pairs with atomic_dec_and_test() in
         * put_files_struct().
         */
-       if (atomic_read_acquire(&files->count) == 1) {
+       if (likely(atomic_read_acquire(&files->count) == 1)) {
                file = files_lookup_fd_raw(files, fd);
                if (!file || unlikely(file->f_mode & mask))
                        return 0;
                return (unsigned long)file;
        } else {
-               file = __fget(fd, mask);
+               file = __fget_files(files, fd, mask);
                if (!file)
                        return 0;
                return FDPUT_FPUT | (unsigned long)file;
index bc4c3287a65ef7b72d28103e7e5ae6fefd0a77c2..80bd7789bab1533d7953c41769922ae07074fe2f 100644 (file)
@@ -83,12 +83,17 @@ struct dentry;
 static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
 {
        struct fdtable *fdt = rcu_dereference_raw(files->fdt);
-
-       if (fd < fdt->max_fds) {
-               fd = array_index_nospec(fd, fdt->max_fds);
-               return rcu_dereference_raw(fdt->fd[fd]);
-       }
-       return NULL;
+       unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds);
+       struct file *needs_masking;
+
+       /*
+        * 'mask' is zero for an out-of-bounds fd, all ones for ok.
+        * 'fd&mask' is 'fd' for ok, or 0 for out of bounds.
+        *
+        * Accessing fdt->fd[0] is ok, but needs masking of the result.
+        */
+       needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]);
+       return (struct file *)(mask & (unsigned long)needs_masking);
 }
 
 static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)