]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xfs: reserve blocks for truncating large realtime inode
authorZhang Yi <yi.zhang@huawei.com>
Tue, 18 Jun 2024 14:21:11 +0000 (22:21 +0800)
committerChristian Brauner <brauner@kernel.org>
Wed, 19 Jun 2024 13:58:28 +0000 (15:58 +0200)
When unaligned truncate down a big realtime file, xfs_truncate_page()
only zeros out the tail EOF block, __xfs_bunmapi() should split the tail
written extent and convert the later one that beyond EOF block to
unwritten, but it couldn't work as expected now since the reserved block
is zero in xfs_setattr_size(), this could expose stale data just after
commit '943bc0882ceb ("iomap: don't increase i_size if it's not a write
operation")'.

If we truncate file that contains a large enough written extent:

     |<    rxext    >|<    rtext    >|
  ...WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW
        ^ (new EOF)      ^ old EOF

Since we only zeros out the tail of the EOF block, and
xfs_itruncate_extents()->..->__xfs_bunmapi() unmap the whole ailgned
extents, it becomes this state:

     |<    rxext    >|
  ...WWWzWWWWWWWWWWWWW
        ^ new EOF

Then if we do an extending write like this, the blocks in the previous
tail extent becomes stale:

     |<    rxext    >|
  ...WWWzSSSSSSSSSSSSS..........WWWWWWWWWWWWWWWWW
        ^ old EOF               ^ append start  ^ new EOF

Fix this by reserving XFS_DIOSTRAT_SPACE_RES blocks for big realtime
inode.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://lore.kernel.org/r/20240618142112.1315279-2-yi.zhang@huaweicloud.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/xfs/xfs_iops.c

index ff222827e55087a1aa366923c9832a3fbb029cd8..a00dcbc77e12b236dc30fe60c801d4e41f62f2c2 100644 (file)
@@ -17,6 +17,8 @@
 #include "xfs_da_btree.h"
 #include "xfs_attr.h"
 #include "xfs_trans.h"
+#include "xfs_trans_space.h"
+#include "xfs_bmap_btree.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_symlink.h"
@@ -811,6 +813,7 @@ xfs_setattr_size(
        struct xfs_trans        *tp;
        int                     error;
        uint                    lock_flags = 0;
+       uint                    resblks = 0;
        bool                    did_zeroing = false;
 
        xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
@@ -917,7 +920,17 @@ xfs_setattr_size(
                        return error;
        }
 
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       /*
+        * For realtime inode with more than one block rtextsize, we need the
+        * block reservation for bmap btree block allocations/splits that can
+        * happen since it could split the tail written extent and convert the
+        * right beyond EOF one to unwritten.
+        */
+       if (xfs_inode_has_bigrtalloc(ip))
+               resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
+                               0, 0, &tp);
        if (error)
                return error;