]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.9.132/dm-thin-metadata-try-to-avoid-ever-aborting-transactions.patch
Linux 4.14.95
[thirdparty/kernel/stable-queue.git] / releases / 4.9.132 / dm-thin-metadata-try-to-avoid-ever-aborting-transactions.patch
1 From foo@baz Mon Oct 8 18:01:43 CEST 2018
2 From: Joe Thornber <ejt@redhat.com>
3 Date: Mon, 10 Sep 2018 16:50:09 +0100
4 Subject: dm thin metadata: try to avoid ever aborting transactions
5
6 From: Joe Thornber <ejt@redhat.com>
7
8 [ Upstream commit 3ab91828166895600efd9cdc3a0eb32001f7204a ]
9
10 Committing a transaction can consume some metadata of it's own, we now
11 reserve a small amount of metadata to cover this. Free metadata
12 reported by the kernel will not include this reserve.
13
14 If any of the reserve has been used after a commit we enter a new
15 internal state PM_OUT_OF_METADATA_SPACE. This is reported as
16 PM_READ_ONLY, so no userland changes are needed. If the metadata
17 device is resized the pool will move back to PM_WRITE.
18
19 These changes mean we never need to abort and rollback a transaction due
20 to running out of metadata space. This is particularly important
21 because there have been a handful of reports of data corruption against
22 DM thin-provisioning that can all be attributed to the thin-pool having
23 ran out of metadata space.
24
25 Signed-off-by: Joe Thornber <ejt@redhat.com>
26 Signed-off-by: Mike Snitzer <snitzer@redhat.com>
27 Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
28 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
29 ---
30 drivers/md/dm-thin-metadata.c | 36 ++++++++++++++++++++
31 drivers/md/dm-thin.c | 73 +++++++++++++++++++++++++++++++++++++-----
32 2 files changed, 100 insertions(+), 9 deletions(-)
33
34 --- a/drivers/md/dm-thin-metadata.c
35 +++ b/drivers/md/dm-thin-metadata.c
36 @@ -190,6 +190,12 @@ struct dm_pool_metadata {
37 sector_t data_block_size;
38
39 /*
40 + * We reserve a section of the metadata for commit overhead.
41 + * All reported space does *not* include this.
42 + */
43 + dm_block_t metadata_reserve;
44 +
45 + /*
46 * Set if a transaction has to be aborted but the attempt to roll back
47 * to the previous (good) transaction failed. The only pool metadata
48 * operation possible in this state is the closing of the device.
49 @@ -827,6 +833,22 @@ static int __commit_transaction(struct d
50 return dm_tm_commit(pmd->tm, sblock);
51 }
52
53 +static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
54 +{
55 + int r;
56 + dm_block_t total;
57 + dm_block_t max_blocks = 4096; /* 16M */
58 +
59 + r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total);
60 + if (r) {
61 + DMERR("could not get size of metadata device");
62 + pmd->metadata_reserve = max_blocks;
63 + } else {
64 + sector_div(total, 10);
65 + pmd->metadata_reserve = min(max_blocks, total);
66 + }
67 +}
68 +
69 struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
70 sector_t data_block_size,
71 bool format_device)
72 @@ -860,6 +882,8 @@ struct dm_pool_metadata *dm_pool_metadat
73 return ERR_PTR(r);
74 }
75
76 + __set_metadata_reserve(pmd);
77 +
78 return pmd;
79 }
80
81 @@ -1831,6 +1855,13 @@ int dm_pool_get_free_metadata_block_coun
82 down_read(&pmd->root_lock);
83 if (!pmd->fail_io)
84 r = dm_sm_get_nr_free(pmd->metadata_sm, result);
85 +
86 + if (!r) {
87 + if (*result < pmd->metadata_reserve)
88 + *result = 0;
89 + else
90 + *result -= pmd->metadata_reserve;
91 + }
92 up_read(&pmd->root_lock);
93
94 return r;
95 @@ -1943,8 +1974,11 @@ int dm_pool_resize_metadata_dev(struct d
96 int r = -EINVAL;
97
98 down_write(&pmd->root_lock);
99 - if (!pmd->fail_io)
100 + if (!pmd->fail_io) {
101 r = __resize_space_map(pmd->metadata_sm, new_count);
102 + if (!r)
103 + __set_metadata_reserve(pmd);
104 + }
105 up_write(&pmd->root_lock);
106
107 return r;
108 --- a/drivers/md/dm-thin.c
109 +++ b/drivers/md/dm-thin.c
110 @@ -200,7 +200,13 @@ struct dm_thin_new_mapping;
111 enum pool_mode {
112 PM_WRITE, /* metadata may be changed */
113 PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */
114 +
115 + /*
116 + * Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY.
117 + */
118 + PM_OUT_OF_METADATA_SPACE,
119 PM_READ_ONLY, /* metadata may not be changed */
120 +
121 PM_FAIL, /* all I/O fails */
122 };
123
124 @@ -1386,7 +1392,35 @@ static void set_pool_mode(struct pool *p
125
126 static void requeue_bios(struct pool *pool);
127
128 -static void check_for_space(struct pool *pool)
129 +static bool is_read_only_pool_mode(enum pool_mode mode)
130 +{
131 + return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY);
132 +}
133 +
134 +static bool is_read_only(struct pool *pool)
135 +{
136 + return is_read_only_pool_mode(get_pool_mode(pool));
137 +}
138 +
139 +static void check_for_metadata_space(struct pool *pool)
140 +{
141 + int r;
142 + const char *ooms_reason = NULL;
143 + dm_block_t nr_free;
144 +
145 + r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
146 + if (r)
147 + ooms_reason = "Could not get free metadata blocks";
148 + else if (!nr_free)
149 + ooms_reason = "No free metadata blocks";
150 +
151 + if (ooms_reason && !is_read_only(pool)) {
152 + DMERR("%s", ooms_reason);
153 + set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
154 + }
155 +}
156 +
157 +static void check_for_data_space(struct pool *pool)
158 {
159 int r;
160 dm_block_t nr_free;
161 @@ -1412,14 +1446,16 @@ static int commit(struct pool *pool)
162 {
163 int r;
164
165 - if (get_pool_mode(pool) >= PM_READ_ONLY)
166 + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
167 return -EINVAL;
168
169 r = dm_pool_commit_metadata(pool->pmd);
170 if (r)
171 metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
172 - else
173 - check_for_space(pool);
174 + else {
175 + check_for_metadata_space(pool);
176 + check_for_data_space(pool);
177 + }
178
179 return r;
180 }
181 @@ -1485,6 +1521,19 @@ static int alloc_data_block(struct thin_
182 return r;
183 }
184
185 + r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
186 + if (r) {
187 + metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
188 + return r;
189 + }
190 +
191 + if (!free_blocks) {
192 + /* Let's commit before we use up the metadata reserve. */
193 + r = commit(pool);
194 + if (r)
195 + return r;
196 + }
197 +
198 return 0;
199 }
200
201 @@ -1516,6 +1565,7 @@ static int should_error_unserviceable_bi
202 case PM_OUT_OF_DATA_SPACE:
203 return pool->pf.error_if_no_space ? -ENOSPC : 0;
204
205 + case PM_OUT_OF_METADATA_SPACE:
206 case PM_READ_ONLY:
207 case PM_FAIL:
208 return -EIO;
209 @@ -2479,8 +2529,9 @@ static void set_pool_mode(struct pool *p
210 error_retry_list(pool);
211 break;
212
213 + case PM_OUT_OF_METADATA_SPACE:
214 case PM_READ_ONLY:
215 - if (old_mode != new_mode)
216 + if (!is_read_only_pool_mode(old_mode))
217 notify_of_pool_mode_change(pool, "read-only");
218 dm_pool_metadata_read_only(pool->pmd);
219 pool->process_bio = process_bio_read_only;
220 @@ -3418,6 +3469,10 @@ static int maybe_resize_metadata_dev(str
221 DMINFO("%s: growing the metadata device from %llu to %llu blocks",
222 dm_device_name(pool->pool_md),
223 sb_metadata_dev_size, metadata_dev_size);
224 +
225 + if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
226 + set_pool_mode(pool, PM_WRITE);
227 +
228 r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
229 if (r) {
230 metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
231 @@ -3721,7 +3776,7 @@ static int pool_message(struct dm_target
232 struct pool_c *pt = ti->private;
233 struct pool *pool = pt->pool;
234
235 - if (get_pool_mode(pool) >= PM_READ_ONLY) {
236 + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
237 DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
238 dm_device_name(pool->pool_md));
239 return -EOPNOTSUPP;
240 @@ -3795,6 +3850,7 @@ static void pool_status(struct dm_target
241 dm_block_t nr_blocks_data;
242 dm_block_t nr_blocks_metadata;
243 dm_block_t held_root;
244 + enum pool_mode mode;
245 char buf[BDEVNAME_SIZE];
246 char buf2[BDEVNAME_SIZE];
247 struct pool_c *pt = ti->private;
248 @@ -3865,9 +3921,10 @@ static void pool_status(struct dm_target
249 else
250 DMEMIT("- ");
251
252 - if (pool->pf.mode == PM_OUT_OF_DATA_SPACE)
253 + mode = get_pool_mode(pool);
254 + if (mode == PM_OUT_OF_DATA_SPACE)
255 DMEMIT("out_of_data_space ");
256 - else if (pool->pf.mode == PM_READ_ONLY)
257 + else if (is_read_only_pool_mode(mode))
258 DMEMIT("ro ");
259 else
260 DMEMIT("rw ");