__setup("initramfs_options=", initramfs_options_setup);
+bool nullfs_rootfs = false;
+
+static int __init nullfs_rootfs_setup(char *str)
+{
+ if (*str)
+ return 0;
+ nullfs_rootfs = true;
+ return 1;
+}
+__setup("nullfs_rootfs", nullfs_rootfs_setup);
+
static u64 event;
static DEFINE_XARRAY_FLAGS(mnt_id_xa, XA_FLAGS_ALLOC);
static DEFINE_IDA(mnt_group_ida);
* pointed to by put_old must yield the same directory as new_root. No other
* file system may be mounted on put_old. After all, new_root is a mountpoint.
*
- * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
- * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
+ * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem
+ * unless the kernel was booted with "nullfs_rootfs". See
+ * Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
* in this situation.
*
* Notes:
static void __init init_mount_tree(void)
{
- struct vfsmount *mnt;
- struct mount *m;
+ struct vfsmount *mnt, *nullfs_mnt;
+ struct mount *mnt_root;
struct path root;
+ /*
+ * When nullfs is used, we create two mounts:
+ *
+ * (1) nullfs with mount id 1
+ * (2) mutable rootfs with mount id 2
+ *
+ * with (2) mounted on top of (1).
+ */
+ if (nullfs_rootfs) {
+ nullfs_mnt = vfs_kern_mount(&nullfs_fs_type, 0, "nullfs", NULL);
+ if (IS_ERR(nullfs_mnt))
+ panic("VFS: Failed to create nullfs");
+ }
+
mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
if (IS_ERR(mnt))
panic("Can't create rootfs");
- m = real_mount(mnt);
- init_mnt_ns.root = m;
- init_mnt_ns.nr_mounts = 1;
- mnt_add_to_ns(&init_mnt_ns, m);
+ if (nullfs_rootfs) {
+ VFS_WARN_ON_ONCE(real_mount(nullfs_mnt)->mnt_id != 1);
+ VFS_WARN_ON_ONCE(real_mount(mnt)->mnt_id != 2);
+
+ /* The namespace root is the nullfs mnt. */
+ mnt_root = real_mount(nullfs_mnt);
+ init_mnt_ns.root = mnt_root;
+
+ /* Mount mutable rootfs on top of nullfs. */
+ root.mnt = nullfs_mnt;
+ root.dentry = nullfs_mnt->mnt_root;
+
+ LOCK_MOUNT_EXACT(mp, &root);
+ if (unlikely(IS_ERR(mp.parent)))
+ panic("VFS: Failed to mount rootfs on nullfs");
+ scoped_guard(mount_writer)
+ attach_mnt(real_mount(mnt), mp.parent, mp.mp);
+
+ pr_info("VFS: Finished mounting rootfs on nullfs\n");
+ } else {
+ VFS_WARN_ON_ONCE(real_mount(mnt)->mnt_id != 1);
+
+ /* The namespace root is the mutable rootfs. */
+ mnt_root = real_mount(mnt);
+ init_mnt_ns.root = mnt_root;
+ }
+
+ /*
+ * We've dropped all locks here but that's fine. Not just are we
+ * the only task that's running, there's no other mount
+ * namespace in existence and the initial mount namespace is
+ * completely empty until we add the mounts we just created.
+ */
+ for (struct mount *p = mnt_root; p; p = next_mnt(p, mnt_root)) {
+ mnt_add_to_ns(&init_mnt_ns, p);
+ init_mnt_ns.nr_mounts++;
+ }
+
init_task.nsproxy->mnt_ns = &init_mnt_ns;
get_mnt_ns(&init_mnt_ns);
- root.mnt = mnt;
- root.dentry = mnt->mnt_root;
-
+ /* The root and pwd always point to the mutable rootfs. */
+ root.mnt = mnt;
+ root.dentry = mnt->mnt_root;
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
+#include <linux/fs/super_types.h>
+#include <linux/fs_context.h>
+#include <linux/magic.h>
+
+static const struct super_operations nullfs_super_operations = {
+ .statfs = simple_statfs,
+};
+
+static int nullfs_fs_fill_super(struct super_block *s, struct fs_context *fc)
+{
+ struct inode *inode;
+
+ s->s_maxbytes = MAX_LFS_FILESIZE;
+ s->s_blocksize = PAGE_SIZE;
+ s->s_blocksize_bits = PAGE_SHIFT;
+ s->s_magic = NULL_FS_MAGIC;
+ s->s_op = &nullfs_super_operations;
+ s->s_export_op = NULL;
+ s->s_xattr = NULL;
+ s->s_time_gran = 1;
+ s->s_d_flags = 0;
+
+ inode = new_inode(s);
+ if (!inode)
+ return -ENOMEM;
+
+ /* nullfs is permanently empty... */
+ make_empty_dir_inode(inode);
+ simple_inode_init_ts(inode);
+ inode->i_ino = 1;
+ /* ... and immutable. */
+ inode->i_flags |= S_IMMUTABLE;
+
+ s->s_root = d_make_root(inode);
+ if (!s->s_root)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * For now this is a single global instance. If needed we can make it
+ * mountable by userspace at which point we will need to make it
+ * multi-instance.
+ */
+static int nullfs_fs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, nullfs_fs_fill_super);
+}
+
+static const struct fs_context_operations nullfs_fs_context_ops = {
+ .get_tree = nullfs_fs_get_tree,
+};
+
+static int nullfs_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &nullfs_fs_context_ops;
+ fc->global = true;
+ fc->sb_flags = SB_NOUSER;
+ fc->s_iflags = SB_I_NOEXEC | SB_I_NODEV;
+ return 0;
+}
+
+struct file_system_type nullfs_fs_type = {
+ .name = "nullfs",
+ .init_fs_context = nullfs_init_fs_context,
+ .kill_sb = kill_anon_super,
+};