]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bdev: open block device as files
authorChristian Brauner <brauner@kernel.org>
Thu, 8 Feb 2024 17:47:35 +0000 (18:47 +0100)
committerChristian Brauner <brauner@kernel.org>
Sun, 25 Feb 2024 11:05:21 +0000 (12:05 +0100)
Add two new helpers to allow opening block devices as files.
This is not the final infrastructure. This still opens the block device
before opening a struct a file. Until we have removed all references to
struct bdev_handle we can't switch the order:

* Introduce blk_to_file_flags() to translate from block specific to
  flags usable to pen a new file.
* Introduce bdev_file_open_by_{dev,path}().
* Introduce temporary sb_bdev_handle() helper to retrieve a struct
  bdev_handle from a block device file and update places that directly
  reference struct bdev_handle to rely on it.
* Don't count block device openes against the number of open files. A
  bdev_file_open_by_{dev,path}() file is never installed into any
  file descriptor table.

One idea that came to mind was to use kernel_tmpfile_open() which
would require us to pass a path and it would then call do_dentry_open()
going through the regular fops->open::blkdev_open() path. But then we're
back to the problem of routing block specific flags such as
BLK_OPEN_RESTRICT_WRITES through the open path and would have to waste
FMODE_* flags every time we add a new one. With this we can avoid using
a flag bit and we have more leeway in how we open block devices from
bdev_open_by_{dev,path}().

Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-1-adbd023e19cc@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
block/bdev.c
fs/cramfs/inode.c
fs/f2fs/super.c
fs/jfs/jfs_logmgr.c
fs/romfs/super.c
fs/super.c
fs/xfs/xfs_super.c
include/linux/blkdev.h
include/linux/fs.h

index e9f1b12bd75c7b0d4b2964995e8fbf70ac3c5c8e..e1149652c53285d7483318b3d367e762d7bf79c0 100644 (file)
@@ -49,6 +49,13 @@ struct block_device *I_BDEV(struct inode *inode)
 }
 EXPORT_SYMBOL(I_BDEV);
 
+struct block_device *file_bdev(struct file *bdev_file)
+{
+       struct bdev_handle *handle = bdev_file->private_data;
+       return handle->bdev;
+}
+EXPORT_SYMBOL(file_bdev);
+
 static void bdev_write_inode(struct block_device *bdev)
 {
        struct inode *inode = bdev->bd_inode;
@@ -368,12 +375,12 @@ static struct file_system_type bd_type = {
 };
 
 struct super_block *blockdev_superblock __ro_after_init;
+struct vfsmount *blockdev_mnt __ro_after_init;
 EXPORT_SYMBOL_GPL(blockdev_superblock);
 
 void __init bdev_cache_init(void)
 {
        int err;
-       static struct vfsmount *bd_mnt __ro_after_init;
 
        bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
                        0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
@@ -382,10 +389,10 @@ void __init bdev_cache_init(void)
        err = register_filesystem(&bd_type);
        if (err)
                panic("Cannot register bdev pseudo-fs");
-       bd_mnt = kern_mount(&bd_type);
-       if (IS_ERR(bd_mnt))
+       blockdev_mnt = kern_mount(&bd_type);
+       if (IS_ERR(blockdev_mnt))
                panic("Cannot create bdev pseudo-fs");
-       blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
+       blockdev_superblock = blockdev_mnt->mnt_sb;   /* For writeback */
 }
 
 struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
@@ -911,6 +918,92 @@ free_handle:
 }
 EXPORT_SYMBOL(bdev_open_by_dev);
 
+/*
+ * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk
+ * associated with the floppy driver where it has allowed ioctls if the
+ * file was opened for writing, but does not allow reads or writes.
+ * Make sure that this quirk is reflected in @f_flags.
+ *
+ * It can also happen if a block device is opened as O_RDWR | O_WRONLY.
+ */
+static unsigned blk_to_file_flags(blk_mode_t mode)
+{
+       unsigned int flags = 0;
+
+       if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) ==
+           (BLK_OPEN_READ | BLK_OPEN_WRITE))
+               flags |= O_RDWR;
+       else if (mode & BLK_OPEN_WRITE_IOCTL)
+               flags |= O_RDWR | O_WRONLY;
+       else if (mode & BLK_OPEN_WRITE)
+               flags |= O_WRONLY;
+       else if (mode & BLK_OPEN_READ)
+               flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */
+       else
+               WARN_ON_ONCE(true);
+
+       if (mode & BLK_OPEN_NDELAY)
+               flags |= O_NDELAY;
+
+       return flags;
+}
+
+struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+                                  const struct blk_holder_ops *hops)
+{
+       struct file *bdev_file;
+       struct bdev_handle *handle;
+       unsigned int flags;
+
+       handle = bdev_open_by_dev(dev, mode, holder, hops);
+       if (IS_ERR(handle))
+               return ERR_CAST(handle);
+
+       flags = blk_to_file_flags(mode);
+       bdev_file = alloc_file_pseudo_noaccount(handle->bdev->bd_inode,
+                       blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops);
+       if (IS_ERR(bdev_file)) {
+               bdev_release(handle);
+               return bdev_file;
+       }
+       ihold(handle->bdev->bd_inode);
+
+       bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+       if (bdev_nowait(handle->bdev))
+               bdev_file->f_mode |= FMODE_NOWAIT;
+
+       bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping;
+       bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
+       bdev_file->private_data = handle;
+       return bdev_file;
+}
+EXPORT_SYMBOL(bdev_file_open_by_dev);
+
+struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
+                                   void *holder,
+                                   const struct blk_holder_ops *hops)
+{
+       struct file *bdev_file;
+       dev_t dev;
+       int error;
+
+       error = lookup_bdev(path, &dev);
+       if (error)
+               return ERR_PTR(error);
+
+       bdev_file = bdev_file_open_by_dev(dev, mode, holder, hops);
+       if (!IS_ERR(bdev_file) && (mode & BLK_OPEN_WRITE)) {
+               struct bdev_handle *handle = bdev_file->private_data;
+               if (bdev_read_only(handle->bdev)) {
+                       fput(bdev_file);
+                       bdev_file = ERR_PTR(-EACCES);
+               }
+       }
+
+       return bdev_file;
+}
+EXPORT_SYMBOL(bdev_file_open_by_path);
+
 /**
  * bdev_open_by_path - open a block device by name
  * @path: path to the block device to open
index 60dbfa0f880514d2bb5ce155a94109db5f34087f..39e75131fd5aa01d732f703cb1f421a3696bffd6 100644 (file)
@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
                sb->s_mtd = NULL;
        } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
                sync_blockdev(sb->s_bdev);
-               bdev_release(sb->s_bdev_handle);
+               fput(sb->s_bdev_file);
        }
        kfree(sbi);
 }
index d45ab0992ae5947e6f89628e8e8829c548645d26..ea94c148fee56679aff3bb1aeb4a47694c39e5a8 100644 (file)
@@ -4247,7 +4247,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 
        for (i = 0; i < max_devices; i++) {
                if (i == 0)
-                       FDEV(0).bdev_handle = sbi->sb->s_bdev_handle;
+                       FDEV(0).bdev_handle = sb_bdev_handle(sbi->sb);
                else if (!RDEV(i).path[0])
                        break;
 
index cb6d1fda66a7021a9ce5b42959122be9ad1934b2..8691463956d17a52bf6a9ed5ce5582ab50ecf067 100644 (file)
@@ -1162,7 +1162,7 @@ static int open_inline_log(struct super_block *sb)
        init_waitqueue_head(&log->syncwait);
 
        set_bit(log_INLINELOG, &log->flag);
-       log->bdev_handle = sb->s_bdev_handle;
+       log->bdev_handle = sb_bdev_handle(sb);
        log->base = addressPXD(&JFS_SBI(sb)->logpxd);
        log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
            (L2LOGPSIZE - sb->s_blocksize_bits);
index 545ad44f96b89148f1fc122d17f5a4b4e1a66deb..1ed468c035579eccf8f6a37277ab340a458a7370 100644 (file)
@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
 #ifdef CONFIG_ROMFS_ON_BLOCK
        if (sb->s_bdev) {
                sync_blockdev(sb->s_bdev);
-               bdev_release(sb->s_bdev_handle);
+               fput(sb->s_bdev_file);
        }
 #endif
 }
index d35e852954892dadcf1df6757c8b491904d2edbb..08dcc3371aa09e9149ba5c8eb6d544175ef7fbda 100644 (file)
@@ -1532,16 +1532,16 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
                struct fs_context *fc)
 {
        blk_mode_t mode = sb_open_mode(sb_flags);
-       struct bdev_handle *bdev_handle;
+       struct file *bdev_file;
        struct block_device *bdev;
 
-       bdev_handle = bdev_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
-       if (IS_ERR(bdev_handle)) {
+       bdev_file = bdev_file_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
+       if (IS_ERR(bdev_file)) {
                if (fc)
                        errorf(fc, "%s: Can't open blockdev", fc->source);
-               return PTR_ERR(bdev_handle);
+               return PTR_ERR(bdev_file);
        }
-       bdev = bdev_handle->bdev;
+       bdev = file_bdev(bdev_file);
 
        /*
         * This really should be in blkdev_get_by_dev, but right now can't due
@@ -1549,7 +1549,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
         * writable from userspace even for a read-only block device.
         */
        if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
-               bdev_release(bdev_handle);
+               fput(bdev_file);
                return -EACCES;
        }
 
@@ -1560,11 +1560,11 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
        if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
                if (fc)
                        warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
-               bdev_release(bdev_handle);
+               fput(bdev_file);
                return -EBUSY;
        }
        spin_lock(&sb_lock);
-       sb->s_bdev_handle = bdev_handle;
+       sb->s_bdev_file = bdev_file;
        sb->s_bdev = bdev;
        sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
        if (bdev_stable_writes(bdev))
@@ -1680,7 +1680,7 @@ void kill_block_super(struct super_block *sb)
        generic_shutdown_super(sb);
        if (bdev) {
                sync_blockdev(bdev);
-               bdev_release(sb->s_bdev_handle);
+               fput(sb->s_bdev_file);
        }
 }
 
index aff20ddd4a9f9cdeeeca1f54f210d19462773a5b..e5ac0e59ede9de46ccb881fc4057af13cd0dd3ed 100644 (file)
@@ -467,7 +467,7 @@ xfs_open_devices(
         * Setup xfs_mount buffer target pointers
         */
        error = -ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_handle);
+       mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb_bdev_handle(sb));
        if (!mp->m_ddev_targp)
                goto out_close_rtdev;
 
index 99e4f5e722132c2c4f301816bbab7871f2f2ccb0..76706aa473163dd573a3da1418993ea03d193c73 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/sbitmap.h>
 #include <linux/uuid.h>
 #include <linux/xarray.h>
+#include <linux/file.h>
 
 struct module;
 struct request_queue;
@@ -1474,6 +1475,7 @@ extern const struct blk_holder_ops fs_holder_ops;
        (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \
         (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE))
 
+/* @bdev_handle will be removed soon. */
 struct bdev_handle {
        struct block_device *bdev;
        void *holder;
@@ -1484,6 +1486,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
                const struct blk_holder_ops *hops);
 struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
                void *holder, const struct blk_holder_ops *hops);
+struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+               const struct blk_holder_ops *hops);
+struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
+               void *holder, const struct blk_holder_ops *hops);
 int bd_prepare_to_claim(struct block_device *bdev, void *holder,
                const struct blk_holder_ops *hops);
 void bd_abort_claiming(struct block_device *bdev, void *holder);
@@ -1494,6 +1500,7 @@ struct block_device *blkdev_get_no_open(dev_t dev);
 void blkdev_put_no_open(struct block_device *bdev);
 
 struct block_device *I_BDEV(struct inode *inode);
+struct block_device *file_bdev(struct file *bdev_file);
 
 #ifdef CONFIG_BLOCK
 void invalidate_bdev(struct block_device *bdev);
index ed5966a70495129be1d6729eed2918240db62df1..e9291e27cc47f3fc84a8d72be415296244601e6e 100644 (file)
@@ -1228,8 +1228,8 @@ struct super_block {
 #endif
        struct hlist_bl_head    s_roots;        /* alternate root dentries for NFS */
        struct list_head        s_mounts;       /* list of mounts; _not_ for fs use */
-       struct block_device     *s_bdev;
-       struct bdev_handle      *s_bdev_handle;
+       struct block_device     *s_bdev;        /* can go away once we use an accessor for @s_bdev_file */
+       struct file             *s_bdev_file;
        struct backing_dev_info *s_bdi;
        struct mtd_info         *s_mtd;
        struct hlist_node       s_instances;
@@ -1327,6 +1327,12 @@ struct super_block {
        struct list_head        s_inodes_wb;    /* writeback inodes */
 } __randomize_layout;
 
+/* Temporary helper that will go away. */
+static inline struct bdev_handle *sb_bdev_handle(struct super_block *sb)
+{
+       return sb->s_bdev_file->private_data;
+}
+
 static inline struct user_namespace *i_user_ns(const struct inode *inode)
 {
        return inode->i_sb->s_user_ns;