]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
kernel/acct.c: saner struct file treatment
authorAl Viro <viro@zeniv.linux.org.uk>
Sun, 6 Jul 2025 20:38:13 +0000 (16:38 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Sun, 28 Sep 2025 00:13:56 +0000 (20:13 -0400)
        Instead of switching ->f_path.mnt of an opened file to internal
clone, get a struct path with ->mnt set to internal clone of that
->f_path.mnt, then dentry_open() that to get the file with right ->f_path.mnt
from the very beginning.

The only subtle part here is that on failure exits we need to
close the file with __fput_sync() and make sure we do that *before*
dropping the original mount.

With that done, only fs/{file_table,open,namei}.c ever store
anything to file->f_path and only prior to file->f_mode & FMODE_OPENED
becoming true.  Analysis of mount write count handling also becomes
less brittle and convoluted...

[AV: folded a fix for a bug spotted by Jan Kara - we do need a full-blown
open of the original file, not just user_path_at() or we end up skipping
permission checks]

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
kernel/acct.c

index 6520baa13669391174b4b86430649e4e90321834..61630110e29dfc4ea43f333a4aed89017aec8559 100644 (file)
  * a struct file opened for write. Fixed. 2/6/2000, AV.
  */
 
-#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/acct.h>
 #include <linux/capability.h>
-#include <linux/file.h>
 #include <linux/tty.h>
-#include <linux/security.h>
-#include <linux/vfs.h>
+#include <linux/statfs.h>
 #include <linux/jiffies.h>
-#include <linux/times.h>
 #include <linux/syscalls.h>
-#include <linux/mount.h>
-#include <linux/uaccess.h>
+#include <linux/namei.h>
 #include <linux/sched/cputime.h>
 
 #include <asm/div64.h>
@@ -217,84 +212,70 @@ static void close_work(struct work_struct *work)
        complete(&acct->done);
 }
 
-static int acct_on(struct filename *pathname)
+DEFINE_FREE(fput_sync, struct file *, if (!IS_ERR_OR_NULL(_T)) __fput_sync(_T))
+static int acct_on(const char __user *name)
 {
-       struct file *file;
-       struct vfsmount *mnt, *internal;
+       /* Difference from BSD - they don't do O_APPEND */
+       const int open_flags = O_WRONLY|O_APPEND|O_LARGEFILE;
        struct pid_namespace *ns = task_active_pid_ns(current);
+       struct filename *pathname __free(putname) = getname(name);
+       struct file *original_file __free(fput) = NULL; // in that order
+       struct path internal __free(path_put) = {};     // in that order
+       struct file *file __free(fput_sync) = NULL;     // in that order
        struct bsd_acct_struct *acct;
+       struct vfsmount *mnt;
        struct fs_pin *old;
-       int err;
 
-       acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
-       if (!acct)
-               return -ENOMEM;
+       if (IS_ERR(pathname))
+               return PTR_ERR(pathname);
+       original_file = file_open_name(pathname, open_flags, 0);
+       if (IS_ERR(original_file))
+               return PTR_ERR(original_file);
 
-       /* Difference from BSD - they don't do O_APPEND */
-       file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
-       if (IS_ERR(file)) {
-               kfree(acct);
+       mnt = mnt_clone_internal(&original_file->f_path);
+       if (IS_ERR(mnt))
+               return PTR_ERR(mnt);
+
+       internal.mnt = mnt;
+       internal.dentry = dget(mnt->mnt_root);
+
+       file = dentry_open(&internal, open_flags, current_cred());
+       if (IS_ERR(file))
                return PTR_ERR(file);
-       }
 
-       if (!S_ISREG(file_inode(file)->i_mode)) {
-               kfree(acct);
-               filp_close(file, NULL);
+       if (!S_ISREG(file_inode(file)->i_mode))
                return -EACCES;
-       }
 
        /* Exclude kernel kernel internal filesystems. */
-       if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) {
-               kfree(acct);
-               filp_close(file, NULL);
+       if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT))
                return -EINVAL;
-       }
 
        /* Exclude procfs and sysfs. */
-       if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) {
-               kfree(acct);
-               filp_close(file, NULL);
+       if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE)
                return -EINVAL;
-       }
 
-       if (!(file->f_mode & FMODE_CAN_WRITE)) {
-               kfree(acct);
-               filp_close(file, NULL);
+       if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EIO;
-       }
-       internal = mnt_clone_internal(&file->f_path);
-       if (IS_ERR(internal)) {
-               kfree(acct);
-               filp_close(file, NULL);
-               return PTR_ERR(internal);
-       }
-       err = mnt_get_write_access(internal);
-       if (err) {
-               mntput(internal);
-               kfree(acct);
-               filp_close(file, NULL);
-               return err;
-       }
-       mnt = file->f_path.mnt;
-       file->f_path.mnt = internal;
+
+       acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+       if (!acct)
+               return -ENOMEM;
 
        atomic_long_set(&acct->count, 1);
        init_fs_pin(&acct->pin, acct_pin_kill);
-       acct->file = file;
+       acct->file = no_free_ptr(file);
        acct->needcheck = jiffies;
        acct->ns = ns;
        mutex_init(&acct->lock);
        INIT_WORK(&acct->work, close_work);
        init_completion(&acct->done);
        mutex_lock_nested(&acct->lock, 1);      /* nobody has seen it yet */
-       pin_insert(&acct->pin, mnt);
+       pin_insert(&acct->pin, original_file->f_path.mnt);
 
        rcu_read_lock();
        old = xchg(&ns->bacct, &acct->pin);
        mutex_unlock(&acct->lock);
        pin_kill(old);
-       mnt_put_write_access(mnt);
-       mntput(mnt);
        return 0;
 }
 
@@ -319,14 +300,9 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
                return -EPERM;
 
        if (name) {
-               struct filename *tmp = getname(name);
-
-               if (IS_ERR(tmp))
-                       return PTR_ERR(tmp);
                mutex_lock(&acct_on_mutex);
-               error = acct_on(tmp);
+               error = acct_on(name);
                mutex_unlock(&acct_on_mutex);
-               putname(tmp);
        } else {
                rcu_read_lock();
                pin_kill(task_active_pid_ns(current)->bacct);