erofs: introduce the page cache share feature

author Hongzhen Luo <hongzhen@linux.alibaba.com>

Fri, 23 Jan 2026 01:31:28 +0000 (01:31 +0000)

committer Gao Xiang <hsiangkao@linux.alibaba.com>

Fri, 23 Jan 2026 12:02:09 +0000 (20:02 +0800)
author Hongzhen Luo <hongzhen@linux.alibaba.com>
Fri, 23 Jan 2026 01:31:28 +0000 (01:31 +0000)
committer Gao Xiang <hsiangkao@linux.alibaba.com>
Fri, 23 Jan 2026 12:02:09 +0000 (20:02 +0800)
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst

index 9f98d18c39f6ea4daf0f0cb7d6df0d822dd6eeaa..af1df574e66cf3b44529bc2a700b3fca6fe72b6c 100644 (file)
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -131,7 +131,12 @@ fsid=%s                Specify a filesystem image ID for Fscache back-end.
  domain_id=%s           Specify a trusted domain ID for fscache mode so that
                         different images with the same blobs, identified by blob IDs,
                         can share storage within the same trusted domain.
+                       Also used for different filesystems with inode page sharing
+                       enabled to share page cache within the trusted domain.
  fsoffset=%llu          Specify block-aligned filesystem offset for the primary device.
+inode_share            Enable inode page sharing for this filesystem.  Inodes with
+                       identical content within the same domain ID can share the
+                       page cache.
  ===================    =========================================================
  
  Sysfs Entries
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile

index 549abc424763257296ecb0e52db38445dce0a98d..a80e1762b60798e70deb2f1c15f14ad1437a458e 100644 (file)
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -10,3 +10,4 @@ erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
  erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
  erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
  erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
+erofs-$(CONFIG_EROFS_FS_PAGE_CACHE_SHARE) += ishare.o
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h

index aea28c2fe27437b1a1b950cc98df525885f92acb..367a9a9f05424cccdf372f3e54b1215090a44221 100644 (file)
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -176,6 +176,7 @@ struct erofs_sb_info {
  #define EROFS_MOUNT_DAX_ALWAYS         0x00000040
  #define EROFS_MOUNT_DAX_NEVER          0x00000080
  #define EROFS_MOUNT_DIRECT_IO          0x00000100
+#define EROFS_MOUNT_INODE_SHARE                0x00000200
  
  #define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
  #define set_opt(opt, option)   ((opt)->mount_opt |= EROFS_MOUNT_##option)
@@ -266,6 +267,11 @@ static inline u64 erofs_nid_to_ino64(struct erofs_sb_info *sbi, erofs_nid_t nid)
  /* default readahead size of directories */
  #define EROFS_DIR_RA_BYTES     16384
  
+struct erofs_inode_fingerprint {
+       u8 *opaque;
+       int size;
+};
+
  struct erofs_inode {
         erofs_nid_t nid;
  
@@ -301,6 +307,18 @@ struct erofs_inode {
                 };
  #endif /* CONFIG_EROFS_FS_ZIP */
         };
+#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
+       struct list_head ishare_list;
+       union {
+               /* for each anon shared inode */
+               struct {
+                       struct erofs_inode_fingerprint fingerprint;
+                       spinlock_t ishare_lock;
+               };
+               /* for each real inode */
+               struct inode *sharedinode;
+       };
+#endif
         /* the corresponding vfs inode */
         struct inode vfs_inode;
  };
@@ -407,6 +425,7 @@ extern const struct inode_operations erofs_dir_iops;
  
  extern const struct file_operations erofs_file_fops;
  extern const struct file_operations erofs_dir_fops;
+extern const struct file_operations erofs_ishare_fops;
  
  extern const struct iomap_ops z_erofs_iomap_report_ops;
  
@@ -560,6 +579,18 @@ static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) {
  static inline void erofs_fscache_submit_bio(struct bio *bio) {}
  #endif
  
+#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
+int __init erofs_init_ishare(void);
+void erofs_exit_ishare(void);
+bool erofs_ishare_fill_inode(struct inode *inode);
+void erofs_ishare_free_inode(struct inode *inode);
+#else
+static inline int erofs_init_ishare(void) { return 0; }
+static inline void erofs_exit_ishare(void) {}
+static inline bool erofs_ishare_fill_inode(struct inode *inode) { return false; }
+static inline void erofs_ishare_free_inode(struct inode *inode) {}
+#endif
+
  long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
  long erofs_compat_ioctl(struct file *filp, unsigned int cmd,
                         unsigned long arg);
diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c

new file mode 100644 (file)

index 0000000..3d26b28
--- /dev/null
+++ b/fs/erofs/ishare.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024, Alibaba Cloud
+ */
+#include <linux/xxhash.h>
+#include <linux/mount.h>
+#include "internal.h"
+#include "xattr.h"
+
+#include "../internal.h"
+
+static struct vfsmount *erofs_ishare_mnt;
+
+static int erofs_ishare_iget5_eq(struct inode *inode, void *data)
+{
+       struct erofs_inode_fingerprint *fp1 = &EROFS_I(inode)->fingerprint;
+       struct erofs_inode_fingerprint *fp2 = data;
+
+       return fp1->size == fp2->size &&
+               !memcmp(fp1->opaque, fp2->opaque, fp2->size);
+}
+
+static int erofs_ishare_iget5_set(struct inode *inode, void *data)
+{
+       struct erofs_inode *vi = EROFS_I(inode);
+
+       vi->fingerprint = *(struct erofs_inode_fingerprint *)data;
+       INIT_LIST_HEAD(&vi->ishare_list);
+       spin_lock_init(&vi->ishare_lock);
+       return 0;
+}
+
+bool erofs_ishare_fill_inode(struct inode *inode)
+{
+       struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+       struct erofs_inode *vi = EROFS_I(inode);
+       struct erofs_inode_fingerprint fp;
+       struct inode *sharedinode;
+       unsigned long hash;
+
+       if (erofs_xattr_fill_inode_fingerprint(&fp, inode, sbi->domain_id))
+               return false;
+       hash = xxh32(fp.opaque, fp.size, 0);
+       sharedinode = iget5_locked(erofs_ishare_mnt->mnt_sb, hash,
+                                  erofs_ishare_iget5_eq, erofs_ishare_iget5_set,
+                                  &fp);
+       if (!sharedinode) {
+               kfree(fp.opaque);
+               return false;
+       }
+
+       if (inode_state_read_once(sharedinode) & I_NEW) {
+               if (erofs_inode_set_aops(sharedinode, inode, true)) {
+                       iget_failed(sharedinode);
+                       kfree(fp.opaque);
+                       return false;
+               }
+               sharedinode->i_size = vi->vfs_inode.i_size;
+               unlock_new_inode(sharedinode);
+       } else {
+               kfree(fp.opaque);
+               if (sharedinode->i_size != vi->vfs_inode.i_size) {
+                       _erofs_printk(inode->i_sb, KERN_WARNING
+                               "size(%lld:%lld) not matches for the same fingerprint\n",
+                               vi->vfs_inode.i_size, sharedinode->i_size);
+                       iput(sharedinode);
+                       return false;
+               }
+       }
+       vi->sharedinode = sharedinode;
+       INIT_LIST_HEAD(&vi->ishare_list);
+       spin_lock(&EROFS_I(sharedinode)->ishare_lock);
+       list_add(&vi->ishare_list, &EROFS_I(sharedinode)->ishare_list);
+       spin_unlock(&EROFS_I(sharedinode)->ishare_lock);
+       return true;
+}
+
+void erofs_ishare_free_inode(struct inode *inode)
+{
+       struct erofs_inode *vi = EROFS_I(inode);
+       struct inode *sharedinode = vi->sharedinode;
+
+       if (!sharedinode)
+               return;
+       spin_lock(&EROFS_I(sharedinode)->ishare_lock);
+       list_del(&vi->ishare_list);
+       spin_unlock(&EROFS_I(sharedinode)->ishare_lock);
+       iput(sharedinode);
+       vi->sharedinode = NULL;
+}
+
+static int erofs_ishare_file_open(struct inode *inode, struct file *file)
+{
+       struct inode *sharedinode = EROFS_I(inode)->sharedinode;
+       struct file *realfile;
+
+       if (file->f_flags & O_DIRECT)
+               return -EINVAL;
+       realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred());
+       if (IS_ERR(realfile))
+               return PTR_ERR(realfile);
+       ihold(sharedinode);
+       realfile->f_op = &erofs_file_fops;
+       realfile->f_inode = sharedinode;
+       realfile->f_mapping = sharedinode->i_mapping;
+       path_get(&file->f_path);
+       backing_file_set_user_path(realfile, &file->f_path);
+
+       file_ra_state_init(&realfile->f_ra, file->f_mapping);
+       realfile->private_data = EROFS_I(inode);
+       file->private_data = realfile;
+       return 0;
+}
+
+static int erofs_ishare_file_release(struct inode *inode, struct file *file)
+{
+       struct file *realfile = file->private_data;
+
+       iput(realfile->f_inode);
+       fput(realfile);
+       file->private_data = NULL;
+       return 0;
+}
+
+static ssize_t erofs_ishare_file_read_iter(struct kiocb *iocb,
+                                          struct iov_iter *to)
+{
+       struct file *realfile = iocb->ki_filp->private_data;
+       struct kiocb dedup_iocb;
+       ssize_t nread;
+
+       if (!iov_iter_count(to))
+               return 0;
+       kiocb_clone(&dedup_iocb, iocb, realfile);
+       nread = filemap_read(&dedup_iocb, to, 0);
+       iocb->ki_pos = dedup_iocb.ki_pos;
+       return nread;
+}
+
+static int erofs_ishare_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct file *realfile = file->private_data;
+
+       vma_set_file(vma, realfile);
+       return generic_file_readonly_mmap(file, vma);
+}
+
+const struct file_operations erofs_ishare_fops = {
+       .open           = erofs_ishare_file_open,
+       .llseek         = generic_file_llseek,
+       .read_iter      = erofs_ishare_file_read_iter,
+       .mmap           = erofs_ishare_mmap,
+       .release        = erofs_ishare_file_release,
+       .get_unmapped_area = thp_get_unmapped_area,
+       .splice_read    = filemap_splice_read,
+};
+
+int __init erofs_init_ishare(void)
+{
+       erofs_ishare_mnt = kern_mount(&erofs_anon_fs_type);
+       return PTR_ERR_OR_ZERO(erofs_ishare_mnt);
+}
+
+void erofs_exit_ishare(void)
+{
+       kern_unmount(erofs_ishare_mnt);
+}
diff --git a/fs/erofs/super.c b/fs/erofs/super.c

index 13876fa597ee85ad0d0c11c694aeda690dc361f9..b9ffb3d42bf429fb0e3ac8cdd3a191f4ab4efb91 100644 (file)
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -398,6 +398,7 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
  enum {
         Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
         Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset,
+       Opt_inode_share,
  };
  
  static const struct constant_table erofs_param_cache_strategy[] = {
@@ -425,6 +426,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
         fsparam_string("domain_id",     Opt_domain_id),
         fsparam_flag_no("directio",     Opt_directio),
         fsparam_u64("fsoffset",         Opt_fsoffset),
+       fsparam_flag("inode_share",     Opt_inode_share),
         {}
  };
  
@@ -526,6 +528,8 @@ static int erofs_fc_parse_param(struct fs_context *fc,
                 if (!sbi->fsid)
                         return -ENOMEM;
                 break;
+#endif
+#if defined(CONFIG_EROFS_FS_ONDEMAND) || defined(CONFIG_EROFS_FS_PAGE_CACHE_SHARE)
         case Opt_domain_id:
                 kfree_sensitive(sbi->domain_id);
                 sbi->domain_id = no_free_ptr(param->string);
@@ -549,6 +553,13 @@ static int erofs_fc_parse_param(struct fs_context *fc,
         case Opt_fsoffset:
                 sbi->dif0.fsoff = result.uint_64;
                 break;
+       case Opt_inode_share:
+#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
+               set_opt(&sbi->opt, INODE_SHARE);
+#else
+               errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
+#endif
+               break;
         }
         return 0;
  }
@@ -647,6 +658,15 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
         sb->s_maxbytes = MAX_LFS_FILESIZE;
         sb->s_op = &erofs_sops;
  
+       if (!sbi->domain_id && test_opt(&sbi->opt, INODE_SHARE)) {
+               errorfc(fc, "domain_id is needed when inode_ishare is on");
+               return -EINVAL;
+       }
+       if (test_opt(&sbi->opt, DAX_ALWAYS) && test_opt(&sbi->opt, INODE_SHARE)) {
+               errorfc(fc, "FSDAX is not allowed when inode_ishare is on");
+               return -EINVAL;
+       }
+
         sbi->blkszbits = PAGE_SHIFT;
         if (!sb->s_bdev) {
                 /*
@@ -724,6 +744,12 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
                 erofs_info(sb, "unsupported blocksize for DAX");
                 clear_opt(&sbi->opt, DAX_ALWAYS);
         }
+       if (test_opt(&sbi->opt, INODE_SHARE) && !erofs_sb_has_ishare_xattrs(sbi)) {
+               erofs_info(sb, "on-disk ishare xattrs not found. Turning off inode_share.");
+               clear_opt(&sbi->opt, INODE_SHARE);
+       }
+       if (test_opt(&sbi->opt, INODE_SHARE))
+               erofs_info(sb, "EXPERIMENTAL EROFS page cache share support in use. Use at your own risk!");
  
         sb->s_time_gran = 1;
         sb->s_xattr = erofs_xattr_handlers;
@@ -953,10 +979,32 @@ static struct file_system_type erofs_fs_type = {
  };
  MODULE_ALIAS_FS("erofs");
  
-#if defined(CONFIG_EROFS_FS_ONDEMAND)
+#if defined(CONFIG_EROFS_FS_ONDEMAND) || defined(CONFIG_EROFS_FS_PAGE_CACHE_SHARE)
+static void erofs_free_anon_inode(struct inode *inode)
+{
+       struct erofs_inode *vi = EROFS_I(inode);
+
+#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
+       kfree(vi->fingerprint.opaque);
+#endif
+       kmem_cache_free(erofs_inode_cachep, vi);
+}
+
+static const struct super_operations erofs_anon_sops = {
+       .alloc_inode = erofs_alloc_inode,
+       .drop_inode = inode_just_drop,
+       .free_inode = erofs_free_anon_inode,
+};
+
  static int erofs_anon_init_fs_context(struct fs_context *fc)
  {
-       return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
+       struct pseudo_fs_context *ctx;
+
+       ctx = init_pseudo(fc, EROFS_SUPER_MAGIC);
+       if (!ctx)
+               return -ENOMEM;
+       ctx->ops = &erofs_anon_sops;
+       return 0;
  }
  
  struct file_system_type erofs_anon_fs_type = {
@@ -991,6 +1039,10 @@ static int __init erofs_module_init(void)
         if (err)
                 goto sysfs_err;
  
+       err = erofs_init_ishare();
+       if (err)
+               goto ishare_err;
+
         err = register_filesystem(&erofs_fs_type);
         if (err)
                 goto fs_err;
@@ -998,6 +1050,8 @@ static int __init erofs_module_init(void)
         return 0;
  
  fs_err:
+       erofs_exit_ishare();
+ishare_err:
         erofs_exit_sysfs();
  sysfs_err:
         z_erofs_exit_subsystem();
@@ -1015,6 +1069,7 @@ static void __exit erofs_module_exit(void)
         /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
         rcu_barrier();
  
+       erofs_exit_ishare();
         erofs_exit_sysfs();
         z_erofs_exit_subsystem();
         erofs_exit_shrinker();
@@ -1069,6 +1124,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
  #endif
         if (sbi->dif0.fsoff)
                 seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff);
+       if (test_opt(opt, INODE_SHARE))
+               seq_puts(seq, ",inode_share");
         return 0;
  }
  
@@ -1079,6 +1136,7 @@ static void erofs_evict_inode(struct inode *inode)
                 dax_break_layout_final(inode);
  #endif
  
+       erofs_ishare_free_inode(inode);
         truncate_inode_pages_final(&inode->i_data);
         clear_inode(inode);
  }
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c

index 732e3b3379d50a2bc65996cc3822fe8107f25209..2ef9d6436b05ee51d0bfe3487b4d83674cbbdf8e 100644 (file)
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -588,3 +588,37 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu)
         return acl;
  }
  #endif
+
+#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
+int erofs_xattr_fill_inode_fingerprint(struct erofs_inode_fingerprint *fp,
+                                      struct inode *inode, const char *domain_id)
+{
+       struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+       struct erofs_xattr_prefix_item *prefix;
+       const char *infix;
+       int valuelen, base_index;
+
+       if (!test_opt(&sbi->opt, INODE_SHARE))
+               return -EOPNOTSUPP;
+       if (!sbi->xattr_prefixes)
+               return -EINVAL;
+       prefix = sbi->xattr_prefixes + sbi->ishare_xattr_prefix_id;
+       infix = prefix->prefix->infix;
+       base_index = prefix->prefix->base_index;
+       valuelen = erofs_getxattr(inode, base_index, infix, NULL, 0);
+       if (valuelen <= 0 || valuelen > (1 << sbi->blkszbits))
+               return -EFSCORRUPTED;
+       fp->size = valuelen + (domain_id ? strlen(domain_id) : 0);
+       fp->opaque = kmalloc(fp->size, GFP_KERNEL);
+       if (!fp->opaque)
+               return -ENOMEM;
+       if (valuelen != erofs_getxattr(inode, base_index, infix,
+                                      fp->opaque, valuelen)) {
+               kfree(fp->opaque);
+               fp->opaque = NULL;
+               return -EFSCORRUPTED;
+       }
+       memcpy(fp->opaque + valuelen, domain_id, fp->size - valuelen);
+       return 0;
+}
+#endif
diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h

index 36f2667afc2df455e1ab41709182b6d4fc15e72e..4d0e58ff7a1404f0d44a83787ca7168082d35bf2 100644 (file)
--- a/fs/erofs/xattr.h
+++ b/fs/erofs/xattr.h
@@ -30,4 +30,7 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu);
  #define erofs_get_acl  (NULL)
  #endif
  
+int erofs_xattr_fill_inode_fingerprint(struct erofs_inode_fingerprint *fp,
+                                      struct inode *inode, const char *domain_id);
+
  #endif
author	Hongzhen Luo <hongzhen@linux.alibaba.com>
	Fri, 23 Jan 2026 01:31:28 +0000 (01:31 +0000)
committer	Gao Xiang <hsiangkao@linux.alibaba.com>
	Fri, 23 Jan 2026 12:02:09 +0000 (20:02 +0800)
Documentation/filesystems/erofs.rst		patch \| blob \| blame \| history
fs/erofs/Makefile		patch \| blob \| blame \| history
fs/erofs/internal.h		patch \| blob \| blame \| history
fs/erofs/ishare.c	[new file with mode: 0644]	patch \| blob
fs/erofs/super.c		patch \| blob \| blame \| history
fs/erofs/xattr.c		patch \| blob \| blame \| history
fs/erofs/xattr.h		patch \| blob \| blame \| history