]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
fs: try an opportunistic lookup for O_CREAT opens too
authorJeff Layton <jlayton@kernel.org>
Wed, 7 Aug 2024 12:10:27 +0000 (08:10 -0400)
committerChristian Brauner <brauner@kernel.org>
Fri, 30 Aug 2024 06:22:34 +0000 (08:22 +0200)
Today, when opening a file we'll typically do a fast lookup, but if
O_CREAT is set, the kernel always takes the exclusive inode lock. I
assume this was done with the expectation that O_CREAT means that we
always expect to do the create, but that's often not the case. Many
programs set O_CREAT even in scenarios where the file already exists.

This patch rearranges the pathwalk-for-open code to also attempt a
fast_lookup in certain O_CREAT cases. If a positive dentry is found, the
inode_lock can be avoided altogether, and if auditing isn't enabled, it
can stay in rcuwalk mode for the last step_into.

One notable exception that is hopefully temporary: if we're doing an
rcuwalk and auditing is enabled, skip the lookup_fast. Legitimizing the
dentry in that case is more expensive than taking the i_rwsem for now.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20240807-openfast-v3-1-040d132d2559@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/namei.c

index c3459785704d3c216ee42b907362c777b41529df..3e34f4d97d83d3901ac9c3833f83a2b0a0a93e19 100644 (file)
@@ -3605,6 +3605,49 @@ out_dput:
        return ERR_PTR(error);
 }
 
+static inline bool trailing_slashes(struct nameidata *nd)
+{
+       return (bool)nd->last.name[nd->last.len];
+}
+
+static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag)
+{
+       struct dentry *dentry;
+
+       if (open_flag & O_CREAT) {
+               /* Don't bother on an O_EXCL create */
+               if (open_flag & O_EXCL)
+                       return NULL;
+
+               /*
+                * FIXME: If auditing is enabled, then we'll have to unlazy to
+                * use the dentry. For now, don't do this, since it shifts
+                * contention from parent's i_rwsem to its d_lockref spinlock.
+                * Reconsider this once dentry refcounting handles heavy
+                * contention better.
+                */
+               if ((nd->flags & LOOKUP_RCU) && !audit_dummy_context())
+                       return NULL;
+       }
+
+       if (trailing_slashes(nd))
+               nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+
+       dentry = lookup_fast(nd);
+       if (IS_ERR_OR_NULL(dentry))
+               return dentry;
+
+       if (open_flag & O_CREAT) {
+               /* Discard negative dentries. Need inode_lock to do the create */
+               if (!dentry->d_inode) {
+                       if (!(nd->flags & LOOKUP_RCU))
+                               dput(dentry);
+                       dentry = NULL;
+               }
+       }
+       return dentry;
+}
+
 static const char *open_last_lookups(struct nameidata *nd,
                   struct file *file, const struct open_flags *op)
 {
@@ -3622,28 +3665,39 @@ static const char *open_last_lookups(struct nameidata *nd,
                return handle_dots(nd, nd->last_type);
        }
 
+       /* We _can_ be in RCU mode here */
+       dentry = lookup_fast_for_open(nd, open_flag);
+       if (IS_ERR(dentry))
+               return ERR_CAST(dentry);
+
        if (!(open_flag & O_CREAT)) {
-               if (nd->last.name[nd->last.len])
-                       nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
-               /* we _can_ be in RCU mode here */
-               dentry = lookup_fast(nd);
-               if (IS_ERR(dentry))
-                       return ERR_CAST(dentry);
                if (likely(dentry))
                        goto finish_lookup;
 
                if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU))
                        return ERR_PTR(-ECHILD);
        } else {
-               /* create side of things */
                if (nd->flags & LOOKUP_RCU) {
-                       if (!try_to_unlazy(nd))
+                       bool unlazied;
+
+                       /* can stay in rcuwalk if not auditing */
+                       if (dentry && audit_dummy_context()) {
+                               if (trailing_slashes(nd))
+                                       return ERR_PTR(-EISDIR);
+                               goto finish_lookup;
+                       }
+                       unlazied = dentry ? try_to_unlazy_next(nd, dentry) :
+                                           try_to_unlazy(nd);
+                       if (!unlazied)
                                return ERR_PTR(-ECHILD);
                }
                audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
-               /* trailing slashes? */
-               if (unlikely(nd->last.name[nd->last.len]))
+               if (trailing_slashes(nd)) {
+                       dput(dentry);
                        return ERR_PTR(-EISDIR);
+               }
+               if (dentry)
+                       goto finish_lookup;
        }
 
        if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {