]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
Merge tag 'vfs-7.1-rc1.bh.metadata' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 13 Apr 2026 19:46:42 +0000 (12:46 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 13 Apr 2026 19:46:42 +0000 (12:46 -0700)
Pull vfs buffer_head updates from Christian Brauner:
 "This cleans up the mess that has accumulated over the years in
  metadata buffer_head tracking for inodes.

  It moves the tracking into dedicated structure in filesystem-private
  part of the inode (so that we don't use private_list, private_data,
  and private_lock in struct address_space), and also moves couple other
  users of private_data and private_list so these are removed from
  struct address_space saving 3 longs in struct inode for 99% of inodes"

* tag 'vfs-7.1-rc1.bh.metadata' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (42 commits)
  fs: Drop i_private_list from address_space
  fs: Drop mapping_metadata_bhs from address space
  ext4: Track metadata bhs in fs-private inode part
  minix: Track metadata bhs in fs-private inode part
  udf: Track metadata bhs in fs-private inode part
  fat: Track metadata bhs in fs-private inode part
  bfs: Track metadata bhs in fs-private inode part
  affs: Track metadata bhs in fs-private inode part
  ext2: Track metadata bhs in fs-private inode part
  fs: Provide functions for handling mapping_metadata_bhs directly
  fs: Switch inode_has_buffers() to take mapping_metadata_bhs
  fs: Make bhs point to mapping_metadata_bhs
  fs: Move metadata bhs tracking to a separate struct
  fs: Fold fsync_buffers_list() into sync_mapping_buffers()
  fs: Drop osync_buffers_list()
  kvm: Use private inode list instead of i_private_list
  fs: Remove i_private_data
  aio: Stop using i_private_data and i_private_lock
  hugetlbfs: Stop using i_private_data
  fs: Stop using i_private_data for metadata bh tracking
  ...

23 files changed:
1  2 
fs/affs/amigaffs.c
fs/affs/file.c
fs/affs/inode.c
fs/affs/namei.c
fs/bfs/dir.c
fs/ext2/inode.c
fs/ext4/ext4.h
fs/ext4/fsync.c
fs/ext4/inode.c
fs/ext4/super.c
fs/inode.c
fs/libfs.c
fs/minix/inode.c
fs/ntfs3/inode.c
fs/udf/directory.c
fs/udf/file.c
fs/udf/inode.c
fs/udf/namei.c
fs/udf/super.c
fs/ufs/dir.c
fs/ufs/inode.c
include/linux/fs.h
mm/hugetlb.c

Simple merge
diff --cc fs/affs/file.c
Simple merge
diff --cc fs/affs/inode.c
Simple merge
diff --cc fs/affs/namei.c
Simple merge
diff --cc fs/bfs/dir.c
Simple merge
diff --cc fs/ext2/inode.c
Simple merge
diff --cc fs/ext4/ext4.h
Simple merge
diff --cc fs/ext4/fsync.c
index bd8f230fa507e77c88096930d1fc634bd749133c,aa80af2b4eea28cd7ead30d2ebbe16067a40be09..924726dcc85ff57cf4906e6cb48095ce7f6fa184
@@@ -83,23 -83,12 +83,24 @@@ static int ext4_fsync_nojournal(struct 
                                int datasync, bool *needs_barrier)
  {
        struct inode *inode = file->f_inode;
 +      struct writeback_control wbc = {
 +              .sync_mode = WB_SYNC_ALL,
 +              .nr_to_write = 0,
 +      };
        int ret;
  
-       ret = generic_buffers_fsync_noflush(file, start, end, datasync);
+       ret = mmb_fsync_noflush(file, &EXT4_I(inode)->i_metadata_bhs,
+                               start, end, datasync);
 -      if (!ret)
 -              ret = ext4_sync_parent(inode);
 +      if (ret)
 +              return ret;
 +
 +      /* Force writeout of inode table buffer to disk */
 +      ret = ext4_write_inode(inode, &wbc);
 +      if (ret)
 +              return ret;
 +
 +      ret = ext4_sync_parent(inode);
 +
        if (test_opt(inode->i_sb, BARRIER))
                *needs_barrier = true;
  
diff --cc fs/ext4/inode.c
index 01679d96cd0f69caaad8c481b013c6fef677126c,c9fd1d17b492ed1f599573aded3adfd375c235c7..f78cf5f238358263aa33f8e2598b150b56d68729
@@@ -186,16 -184,10 +186,18 @@@ void ext4_evict_inode(struct inode *ino
        if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
                ext4_evict_ea_inode(inode);
        if (inode->i_nlink) {
 +              /*
 +               * If there's dirty page will lead to data loss, user
 +               * could see stale data.
 +               */
 +              if (unlikely(!ext4_emergency_state(inode->i_sb) &&
 +                  mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY)))
 +                      ext4_warning_inode(inode, "data will be lost");
 +
                truncate_inode_pages_final(&inode->i_data);
+               /* Avoid mballoc special inode which has no proper iops */
+               if (!EXT4_SB(inode->i_sb)->s_journal)
+                       mmb_sync(&EXT4_I(inode)->i_metadata_bhs);
                goto no_delete;
        }
  
diff --cc fs/ext4/super.c
index 8ca399c15970c44f6e7879cba8bd167fccad39bc,31f787a65fac85b7b82493be369f343b06df57d2..578508eb4f1a9c451e70d8b34f2bf14dddb2e056
@@@ -1520,30 -1525,10 +1521,31 @@@ static void destroy_inodecache(void
  void ext4_clear_inode(struct inode *inode)
  {
        ext4_fc_del(inode);
-       invalidate_inode_buffers(inode);
+       if (!EXT4_SB(inode->i_sb)->s_journal)
+               mmb_invalidate(&EXT4_I(inode)->i_metadata_bhs);
        clear_inode(inode);
        ext4_discard_preallocations(inode);
 +      /*
 +       * We must remove the inode from the hash before ext4_free_inode()
 +       * clears the bit in inode bitmap as otherwise another process reusing
 +       * the inode will block in insert_inode_hash() waiting for inode
 +       * eviction to complete while holding transaction handle open, but
 +       * ext4_evict_inode() still running for that inode could block waiting
 +       * for transaction commit if the inode is marked as IS_SYNC => deadlock.
 +       *
 +       * Removing the inode from the hash here is safe. There are two cases
 +       * to consider:
 +       * 1) The inode still has references to it (i_nlink > 0). In that case
 +       * we are keeping the inode and once we remove the inode from the hash,
 +       * iget() can create the new inode structure for the same inode number
 +       * and we are fine with that as all IO on behalf of the inode is
 +       * finished.
 +       * 2) We are deleting the inode (i_nlink == 0). In that case inode
 +       * number cannot be reused until ext4_free_inode() clears the bit in
 +       * the inode bitmap, at which point all IO is done and reuse is fine
 +       * again.
 +       */
 +      remove_inode_hash(inode);
        ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
        dquot_drop(inode);
        if (EXT4_I(inode)->jinode) {
diff --cc fs/inode.c
Simple merge
diff --cc fs/libfs.c
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc fs/udf/file.c
Simple merge
diff --cc fs/udf/inode.c
Simple merge
diff --cc fs/udf/namei.c
Simple merge
diff --cc fs/udf/super.c
Simple merge
diff --cc fs/ufs/dir.c
Simple merge
diff --cc fs/ufs/inode.c
Simple merge
Simple merge
diff --cc mm/hugetlb.c
Simple merge