]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-4.19/btrfs-send-flush-dellaloc-in-order-to-avoid-data-loss.patch
bee847a079852f40950e4bb1abe8ecbce1c12a72
[thirdparty/kernel/stable-queue.git] / queue-4.19 / btrfs-send-flush-dellaloc-in-order-to-avoid-data-loss.patch
1 From 9f89d5de8631c7930898a601b6612e271aa2261c Mon Sep 17 00:00:00 2001
2 From: Filipe Manana <fdmanana@suse.com>
3 Date: Mon, 15 Apr 2019 09:29:36 +0100
4 Subject: Btrfs: send, flush dellaloc in order to avoid data loss
5
6 From: Filipe Manana <fdmanana@suse.com>
7
8 commit 9f89d5de8631c7930898a601b6612e271aa2261c upstream.
9
10 When we set a subvolume to read-only mode we do not flush dellaloc for any
11 of its inodes (except if the filesystem is mounted with -o flushoncommit),
12 since it does not affect correctness for any subsequent operations - except
13 for a future send operation. The send operation will not be able to see the
14 delalloc data since the respective file extent items, inode item updates,
15 backreferences, etc, have not hit yet the subvolume and extent trees.
16
17 Effectively this means data loss, since the send stream will not contain
18 any data from existing delalloc. Another problem from this is that if the
19 writeback starts and finishes while the send operation is in progress, we
20 have the subvolume tree being being modified concurrently which can result
21 in send failing unexpectedly with EIO or hitting runtime errors, assertion
22 failures or hitting BUG_ONs, etc.
23
24 Simple reproducer:
25
26 $ mkfs.btrfs -f /dev/sdb
27 $ mount /dev/sdb /mnt
28
29 $ btrfs subvolume create /mnt/sv
30 $ xfs_io -f -c "pwrite -S 0xea 0 108K" /mnt/sv/foo
31
32 $ btrfs property set /mnt/sv ro true
33 $ btrfs send -f /tmp/send.stream /mnt/sv
34
35 $ od -t x1 -A d /mnt/sv/foo
36 0000000 ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea
37 *
38 0110592
39
40 $ umount /mnt
41 $ mkfs.btrfs -f /dev/sdc
42 $ mount /dev/sdc /mnt
43
44 $ btrfs receive -f /tmp/send.stream /mnt
45 $ echo $?
46 0
47 $ od -t x1 -A d /mnt/sv/foo
48 0000000
49 # ---> empty file
50
51 Since this a problem that affects send only, fix it in send by flushing
52 dellaloc for all the roots used by the send operation before send starts
53 to process the commit roots.
54
55 This is a problem that affects send since it was introduced (commit
56 31db9f7c23fbf7 ("Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive"))
57 but backporting it to older kernels has some dependencies:
58
59 - For kernels between 3.19 and 4.20, it depends on commit 3cd24c698004d2
60 ("btrfs: use tagged writepage to mitigate livelock of snapshot") because
61 the function btrfs_start_delalloc_snapshot() does not exist before that
62 commit. So one has to either pick that commit or replace the calls to
63 btrfs_start_delalloc_snapshot() in this patch with calls to
64 btrfs_start_delalloc_inodes().
65
66 - For kernels older than 3.19 it also requires commit e5fa8f865b3324
67 ("Btrfs: ensure send always works on roots without orphans") because
68 it depends on the function ensure_commit_roots_uptodate() which that
69 commits introduced.
70
71 - No dependencies for 5.0+ kernels.
72
73 A test case for fstests follows soon.
74
75 CC: stable@vger.kernel.org # 3.19+
76 Signed-off-by: Filipe Manana <fdmanana@suse.com>
77 Signed-off-by: David Sterba <dsterba@suse.com>
78 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
79
80 ---
81 fs/btrfs/send.c | 36 ++++++++++++++++++++++++++++++++++++
82 1 file changed, 36 insertions(+)
83
84 --- a/fs/btrfs/send.c
85 +++ b/fs/btrfs/send.c
86 @@ -6583,6 +6583,38 @@ commit_trans:
87 return btrfs_commit_transaction(trans);
88 }
89
90 +/*
91 + * Make sure any existing dellaloc is flushed for any root used by a send
92 + * operation so that we do not miss any data and we do not race with writeback
93 + * finishing and changing a tree while send is using the tree. This could
94 + * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
95 + * a send operation then uses the subvolume.
96 + * After flushing delalloc ensure_commit_roots_uptodate() must be called.
97 + */
98 +static int flush_delalloc_roots(struct send_ctx *sctx)
99 +{
100 + struct btrfs_root *root = sctx->parent_root;
101 + int ret;
102 + int i;
103 +
104 + if (root) {
105 + ret = btrfs_start_delalloc_snapshot(root);
106 + if (ret)
107 + return ret;
108 + btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
109 + }
110 +
111 + for (i = 0; i < sctx->clone_roots_cnt; i++) {
112 + root = sctx->clone_roots[i].root;
113 + ret = btrfs_start_delalloc_snapshot(root);
114 + if (ret)
115 + return ret;
116 + btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
117 + }
118 +
119 + return 0;
120 +}
121 +
122 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
123 {
124 spin_lock(&root->root_item_lock);
125 @@ -6807,6 +6839,10 @@ long btrfs_ioctl_send(struct file *mnt_f
126 NULL);
127 sort_clone_roots = 1;
128
129 + ret = flush_delalloc_roots(sctx);
130 + if (ret)
131 + goto out;
132 +
133 ret = ensure_commit_roots_uptodate(sctx);
134 if (ret)
135 goto out;