]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/6.6.26/btrfs-ensure-fiemap-doesn-t-race-with-writes-when-fi.patch
Linux 6.1.85
[thirdparty/kernel/stable-queue.git] / releases / 6.6.26 / btrfs-ensure-fiemap-doesn-t-race-with-writes-when-fi.patch
1 From 0d068aa2fa62b37eea4d8c2ab6e0fe3fc2d56cdb Mon Sep 17 00:00:00 2001
2 From: Sasha Levin <sashal@kernel.org>
3 Date: Thu, 22 Feb 2024 12:29:34 +0000
4 Subject: btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC
5 is given
6
7 From: Filipe Manana <fdmanana@suse.com>
8
9 [ Upstream commit 418b09027743d9a9fb39116bed46a192f868a3c3 ]
10
11 When FIEMAP_FLAG_SYNC is given to fiemap the expectation is that that
12 are no concurrent writes and we get a stable view of the inode's extent
13 layout.
14
15 When the flag is given we flush all IO (and wait for ordered extents to
16 complete) and then lock the inode in shared mode, however that leaves open
17 the possibility that a write might happen right after the flushing and
18 before locking the inode. So fix this by flushing again after locking the
19 inode - we leave the initial flushing before locking the inode to avoid
20 holding the lock and blocking other RO operations while waiting for IO
21 and ordered extents to complete. The second flushing while holding the
22 inode's lock will most of the time do nothing or very little since the
23 time window for new writes to have happened is small.
24
25 Reviewed-by: Josef Bacik <josef@toxicpanda.com>
26 Signed-off-by: Filipe Manana <fdmanana@suse.com>
27 Signed-off-by: David Sterba <dsterba@suse.com>
28 Stable-dep-of: 978b63f7464a ("btrfs: fix race when detecting delalloc ranges during fiemap")
29 Signed-off-by: Sasha Levin <sashal@kernel.org>
30 ---
31 fs/btrfs/extent_io.c | 21 ++++++++-------------
32 fs/btrfs/inode.c | 22 +++++++++++++++++++++-
33 2 files changed, 29 insertions(+), 14 deletions(-)
34
35 diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
36 index fc8eb8d86ca25..45d427c3033d7 100644
37 --- a/fs/btrfs/extent_io.c
38 +++ b/fs/btrfs/extent_io.c
39 @@ -2953,17 +2953,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
40 range_end = round_up(start + len, sectorsize);
41 prev_extent_end = range_start;
42
43 - btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
44 -
45 ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
46 if (ret < 0)
47 - goto out_unlock;
48 + goto out;
49 btrfs_release_path(path);
50
51 path->reada = READA_FORWARD;
52 ret = fiemap_search_slot(inode, path, range_start);
53 if (ret < 0) {
54 - goto out_unlock;
55 + goto out;
56 } else if (ret > 0) {
57 /*
58 * No file extent item found, but we may have delalloc between
59 @@ -3010,7 +3008,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
60 backref_ctx, 0, 0, 0,
61 prev_extent_end, hole_end);
62 if (ret < 0) {
63 - goto out_unlock;
64 + goto out;
65 } else if (ret > 0) {
66 /* fiemap_fill_next_extent() told us to stop. */
67 stopped = true;
68 @@ -3066,7 +3064,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
69 extent_gen,
70 backref_ctx);
71 if (ret < 0)
72 - goto out_unlock;
73 + goto out;
74 else if (ret > 0)
75 flags |= FIEMAP_EXTENT_SHARED;
76 }
77 @@ -3077,7 +3075,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
78 }
79
80 if (ret < 0) {
81 - goto out_unlock;
82 + goto out;
83 } else if (ret > 0) {
84 /* fiemap_fill_next_extent() told us to stop. */
85 stopped = true;
86 @@ -3088,12 +3086,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
87 next_item:
88 if (fatal_signal_pending(current)) {
89 ret = -EINTR;
90 - goto out_unlock;
91 + goto out;
92 }
93
94 ret = fiemap_next_leaf_item(inode, path);
95 if (ret < 0) {
96 - goto out_unlock;
97 + goto out;
98 } else if (ret > 0) {
99 /* No more file extent items for this inode. */
100 break;
101 @@ -3117,7 +3115,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
102 &delalloc_cached_state, backref_ctx,
103 0, 0, 0, prev_extent_end, range_end - 1);
104 if (ret < 0)
105 - goto out_unlock;
106 + goto out;
107 prev_extent_end = range_end;
108 }
109
110 @@ -3155,9 +3153,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
111 }
112
113 ret = emit_last_fiemap_cache(fieinfo, &cache);
114 -
115 -out_unlock:
116 - btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
117 out:
118 free_extent_state(delalloc_cached_state);
119 btrfs_free_backref_share_ctx(backref_ctx);
120 diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
121 index ca79c2b8adc46..1ac14223ffb50 100644
122 --- a/fs/btrfs/inode.c
123 +++ b/fs/btrfs/inode.c
124 @@ -7813,6 +7813,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
125 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
126 u64 start, u64 len)
127 {
128 + struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
129 int ret;
130
131 ret = fiemap_prep(inode, fieinfo, start, &len, 0);
132 @@ -7838,7 +7839,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
133 return ret;
134 }
135
136 - return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
137 + btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
138 +
139 + /*
140 + * We did an initial flush to avoid holding the inode's lock while
141 + * triggering writeback and waiting for the completion of IO and ordered
142 + * extents. Now after we locked the inode we do it again, because it's
143 + * possible a new write may have happened in between those two steps.
144 + */
145 + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
146 + ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
147 + if (ret) {
148 + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
149 + return ret;
150 + }
151 + }
152 +
153 + ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
154 + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
155 +
156 + return ret;
157 }
158
159 static int btrfs_writepages(struct address_space *mapping,
160 --
161 2.43.0
162