From: Linus Torvalds Date: Mon, 13 Apr 2026 19:46:42 +0000 (-0700) Subject: Merge tag 'vfs-7.1-rc1.bh.metadata' of git://git.kernel.org/pub/scm/linux/kernel... X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fc825e513cd494cfcbeb47acf5738fe64f3a9051;p=thirdparty%2Fkernel%2Flinux.git Merge tag 'vfs-7.1-rc1.bh.metadata' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull vfs buffer_head updates from Christian Brauner: "This cleans up the mess that has accumulated over the years in metadata buffer_head tracking for inodes. It moves the tracking into dedicated structure in filesystem-private part of the inode (so that we don't use private_list, private_data, and private_lock in struct address_space), and also moves couple other users of private_data and private_list so these are removed from struct address_space saving 3 longs in struct inode for 99% of inodes" * tag 'vfs-7.1-rc1.bh.metadata' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (42 commits) fs: Drop i_private_list from address_space fs: Drop mapping_metadata_bhs from address space ext4: Track metadata bhs in fs-private inode part minix: Track metadata bhs in fs-private inode part udf: Track metadata bhs in fs-private inode part fat: Track metadata bhs in fs-private inode part bfs: Track metadata bhs in fs-private inode part affs: Track metadata bhs in fs-private inode part ext2: Track metadata bhs in fs-private inode part fs: Provide functions for handling mapping_metadata_bhs directly fs: Switch inode_has_buffers() to take mapping_metadata_bhs fs: Make bhs point to mapping_metadata_bhs fs: Move metadata bhs tracking to a separate struct fs: Fold fsync_buffers_list() into sync_mapping_buffers() fs: Drop osync_buffers_list() kvm: Use private inode list instead of i_private_list fs: Remove i_private_data aio: Stop using i_private_data and i_private_lock hugetlbfs: Stop using i_private_data fs: Stop using i_private_data for metadata bh tracking ... --- fc825e513cd494cfcbeb47acf5738fe64f3a9051 diff --cc fs/ext4/fsync.c index bd8f230fa507e,aa80af2b4eea2..924726dcc85ff --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@@ -83,23 -83,12 +83,24 @@@ static int ext4_fsync_nojournal(struct int datasync, bool *needs_barrier) { struct inode *inode = file->f_inode; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, + }; int ret; - ret = generic_buffers_fsync_noflush(file, start, end, datasync); + ret = mmb_fsync_noflush(file, &EXT4_I(inode)->i_metadata_bhs, + start, end, datasync); - if (!ret) - ret = ext4_sync_parent(inode); + if (ret) + return ret; + + /* Force writeout of inode table buffer to disk */ + ret = ext4_write_inode(inode, &wbc); + if (ret) + return ret; + + ret = ext4_sync_parent(inode); + if (test_opt(inode->i_sb, BARRIER)) *needs_barrier = true; diff --cc fs/ext4/inode.c index 01679d96cd0f6,c9fd1d17b492e..f78cf5f238358 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@@ -186,16 -184,10 +186,18 @@@ void ext4_evict_inode(struct inode *ino if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ext4_evict_ea_inode(inode); if (inode->i_nlink) { + /* + * If there's dirty page will lead to data loss, user + * could see stale data. + */ + if (unlikely(!ext4_emergency_state(inode->i_sb) && + mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY))) + ext4_warning_inode(inode, "data will be lost"); + truncate_inode_pages_final(&inode->i_data); - + /* Avoid mballoc special inode which has no proper iops */ + if (!EXT4_SB(inode->i_sb)->s_journal) + mmb_sync(&EXT4_I(inode)->i_metadata_bhs); goto no_delete; } diff --cc fs/ext4/super.c index 8ca399c15970c,31f787a65fac8..578508eb4f1a9 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@@ -1520,30 -1525,10 +1521,31 @@@ static void destroy_inodecache(void void ext4_clear_inode(struct inode *inode) { ext4_fc_del(inode); - invalidate_inode_buffers(inode); + if (!EXT4_SB(inode->i_sb)->s_journal) + mmb_invalidate(&EXT4_I(inode)->i_metadata_bhs); clear_inode(inode); ext4_discard_preallocations(inode); + /* + * We must remove the inode from the hash before ext4_free_inode() + * clears the bit in inode bitmap as otherwise another process reusing + * the inode will block in insert_inode_hash() waiting for inode + * eviction to complete while holding transaction handle open, but + * ext4_evict_inode() still running for that inode could block waiting + * for transaction commit if the inode is marked as IS_SYNC => deadlock. + * + * Removing the inode from the hash here is safe. There are two cases + * to consider: + * 1) The inode still has references to it (i_nlink > 0). In that case + * we are keeping the inode and once we remove the inode from the hash, + * iget() can create the new inode structure for the same inode number + * and we are fine with that as all IO on behalf of the inode is + * finished. + * 2) We are deleting the inode (i_nlink == 0). In that case inode + * number cannot be reused until ext4_free_inode() clears the bit in + * the inode bitmap, at which point all IO is done and reuse is fine + * again. + */ + remove_inode_hash(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); dquot_drop(inode); if (EXT4_I(inode)->jinode) {