]>
Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
4 | #include "checksum.h" | |
065bd335 | 5 | #include "counters.h" |
1c6fdbd8 | 6 | #include "disk_groups.h" |
cd575ddf | 7 | #include "ec.h" |
1c6fdbd8 | 8 | #include "error.h" |
1c6fdbd8 | 9 | #include "journal.h" |
25be2e5d | 10 | #include "journal_sb.h" |
1dd7f9d9 | 11 | #include "journal_seq_blacklist.h" |
065bd335 | 12 | #include "recovery.h" |
1c6fdbd8 KO |
13 | #include "replicas.h" |
14 | #include "quota.h" | |
a37ad1a3 | 15 | #include "sb-clean.h" |
f5d26fa3 | 16 | #include "sb-errors.h" |
fb8e5b4c | 17 | #include "sb-members.h" |
1c6fdbd8 KO |
18 | #include "super-io.h" |
19 | #include "super.h" | |
4254f5bf | 20 | #include "trace.h" |
1c6fdbd8 KO |
21 | #include "vstructs.h" |
22 | ||
23 | #include <linux/backing-dev.h> | |
24 | #include <linux/sort.h> | |
25 | ||
26 | static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { | |
27 | }; | |
28 | ||
065bd335 | 29 | struct bch2_metadata_version { |
ba8eeae8 KO |
30 | u16 version; |
31 | const char *name; | |
065bd335 | 32 | u64 recovery_passes; |
ba8eeae8 KO |
33 | }; |
34 | ||
065bd335 KO |
35 | static const struct bch2_metadata_version bch2_metadata_versions[] = { |
36 | #define x(n, v, _recovery_passes) { \ | |
37 | .version = v, \ | |
38 | .name = #n, \ | |
39 | .recovery_passes = _recovery_passes, \ | |
40 | }, | |
e3804b55 KO |
41 | BCH_METADATA_VERSIONS() |
42 | #undef x | |
43 | }; | |
44 | ||
45 | void bch2_version_to_text(struct printbuf *out, unsigned v) | |
46 | { | |
ba8eeae8 KO |
47 | const char *str = "(unknown version)"; |
48 | ||
49 | for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) | |
50 | if (bch2_metadata_versions[i].version == v) { | |
51 | str = bch2_metadata_versions[i].name; | |
52 | break; | |
53 | } | |
54 | ||
55 | prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); | |
56 | } | |
57 | ||
58 | unsigned bch2_latest_compatible_version(unsigned v) | |
59 | { | |
60 | if (!BCH_VERSION_MAJOR(v)) | |
61 | return v; | |
62 | ||
63 | for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) | |
64 | if (bch2_metadata_versions[i].version > v && | |
65 | BCH_VERSION_MAJOR(bch2_metadata_versions[i].version) == | |
66 | BCH_VERSION_MAJOR(v)) | |
67 | v = bch2_metadata_versions[i].version; | |
e3804b55 | 68 | |
ba8eeae8 | 69 | return v; |
e3804b55 KO |
70 | } |
71 | ||
065bd335 KO |
72 | u64 bch2_upgrade_recovery_passes(struct bch_fs *c, |
73 | unsigned old_version, | |
74 | unsigned new_version) | |
75 | { | |
76 | u64 ret = 0; | |
77 | ||
78 | for (const struct bch2_metadata_version *i = bch2_metadata_versions; | |
79 | i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions); | |
80 | i++) | |
81 | if (i->version > old_version && i->version <= new_version) { | |
82 | if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK) | |
83 | ret |= bch2_fsck_recovery_passes(); | |
84 | ret |= i->recovery_passes; | |
85 | } | |
86 | ||
87 | return ret &= ~RECOVERY_PASS_ALL_FSCK; | |
88 | } | |
89 | ||
1c6fdbd8 KO |
90 | const char * const bch2_sb_fields[] = { |
91 | #define x(name, nr) #name, | |
92 | BCH_SB_FIELDS() | |
93 | #undef x | |
94 | NULL | |
95 | }; | |
96 | ||
efe68e1d KO |
97 | static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *, |
98 | struct printbuf *); | |
1c6fdbd8 | 99 | |
4637429e | 100 | struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb, |
1c6fdbd8 KO |
101 | enum bch_sb_field_type type) |
102 | { | |
103 | struct bch_sb_field *f; | |
104 | ||
105 | /* XXX: need locking around superblock to access optional fields */ | |
106 | ||
107 | vstruct_for_each(sb, f) | |
108 | if (le32_to_cpu(f->type) == type) | |
109 | return f; | |
110 | return NULL; | |
111 | } | |
112 | ||
113 | static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, | |
114 | struct bch_sb_field *f, | |
115 | unsigned u64s) | |
116 | { | |
117 | unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0; | |
118 | unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s; | |
119 | ||
9d8022db | 120 | BUG_ON(__vstruct_bytes(struct bch_sb, sb_u64s) > sb->buffer_size); |
1c6fdbd8 | 121 | |
187c71f6 KO |
122 | if (!f && !u64s) { |
123 | /* nothing to do: */ | |
124 | } else if (!f) { | |
1c6fdbd8 KO |
125 | f = vstruct_last(sb->sb); |
126 | memset(f, 0, sizeof(u64) * u64s); | |
127 | f->u64s = cpu_to_le32(u64s); | |
128 | f->type = 0; | |
129 | } else { | |
130 | void *src, *dst; | |
131 | ||
132 | src = vstruct_end(f); | |
af9d3bc2 KO |
133 | |
134 | if (u64s) { | |
135 | f->u64s = cpu_to_le32(u64s); | |
136 | dst = vstruct_end(f); | |
137 | } else { | |
138 | dst = f; | |
139 | } | |
1c6fdbd8 KO |
140 | |
141 | memmove(dst, src, vstruct_end(sb->sb) - src); | |
142 | ||
143 | if (dst > src) | |
144 | memset(src, 0, dst - src); | |
145 | } | |
146 | ||
147 | sb->sb->u64s = cpu_to_le32(sb_u64s); | |
148 | ||
af9d3bc2 KO |
149 | return u64s ? f : NULL; |
150 | } | |
151 | ||
152 | void bch2_sb_field_delete(struct bch_sb_handle *sb, | |
153 | enum bch_sb_field_type type) | |
154 | { | |
4637429e | 155 | struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); |
af9d3bc2 KO |
156 | |
157 | if (f) | |
158 | __bch2_sb_field_resize(sb, f, 0); | |
1c6fdbd8 KO |
159 | } |
160 | ||
161 | /* Superblock realloc/free: */ | |
162 | ||
163 | void bch2_free_super(struct bch_sb_handle *sb) | |
164 | { | |
3e3e02e6 | 165 | kfree(sb->bio); |
1c6fdbd8 KO |
166 | if (!IS_ERR_OR_NULL(sb->bdev)) |
167 | blkdev_put(sb->bdev, sb->holder); | |
168 | kfree(sb->holder); | |
63807d95 | 169 | kfree(sb->sb_name); |
1c6fdbd8 | 170 | |
9d8022db | 171 | kfree(sb->sb); |
1c6fdbd8 KO |
172 | memset(sb, 0, sizeof(*sb)); |
173 | } | |
174 | ||
175 | int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) | |
176 | { | |
177 | size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s); | |
9d8022db | 178 | size_t new_buffer_size; |
1c6fdbd8 KO |
179 | struct bch_sb *new_sb; |
180 | struct bio *bio; | |
181 | ||
9d8022db KO |
182 | if (sb->bdev) |
183 | new_bytes = max_t(size_t, new_bytes, bdev_logical_block_size(sb->bdev)); | |
184 | ||
185 | new_buffer_size = roundup_pow_of_two(new_bytes); | |
186 | ||
187 | if (sb->sb && sb->buffer_size >= new_buffer_size) | |
1c6fdbd8 KO |
188 | return 0; |
189 | ||
40a53b92 | 190 | if (sb->sb && sb->have_layout) { |
1c6fdbd8 KO |
191 | u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; |
192 | ||
193 | if (new_bytes > max_bytes) { | |
194 | pr_err("%pg: superblock too big: want %zu but have %llu", | |
195 | sb->bdev, new_bytes, max_bytes); | |
098ef98d | 196 | return -BCH_ERR_ENOSPC_sb; |
1c6fdbd8 KO |
197 | } |
198 | } | |
199 | ||
9d8022db | 200 | if (sb->buffer_size >= new_buffer_size && sb->sb) |
1c6fdbd8 KO |
201 | return 0; |
202 | ||
203 | if (dynamic_fault("bcachefs:add:super_realloc")) | |
65d48e35 | 204 | return -BCH_ERR_ENOMEM_sb_realloc_injected; |
1c6fdbd8 | 205 | |
6fe893ea KO |
206 | new_sb = krealloc(sb->sb, new_buffer_size, GFP_NOFS|__GFP_ZERO); |
207 | if (!new_sb) | |
208 | return -BCH_ERR_ENOMEM_sb_buf_realloc; | |
209 | ||
210 | sb->sb = new_sb; | |
211 | ||
1c6fdbd8 | 212 | if (sb->have_bio) { |
6fe893ea | 213 | unsigned nr_bvecs = buf_pages(sb->sb, new_buffer_size); |
1c6fdbd8 KO |
214 | |
215 | bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); | |
216 | if (!bio) | |
65d48e35 | 217 | return -BCH_ERR_ENOMEM_sb_bio_realloc; |
1c6fdbd8 KO |
218 | |
219 | bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0); | |
220 | ||
3e3e02e6 | 221 | kfree(sb->bio); |
1c6fdbd8 KO |
222 | sb->bio = bio; |
223 | } | |
224 | ||
9d8022db | 225 | sb->buffer_size = new_buffer_size; |
1c6fdbd8 KO |
226 | |
227 | return 0; | |
228 | } | |
229 | ||
4637429e | 230 | struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, |
1c6fdbd8 KO |
231 | enum bch_sb_field_type type, |
232 | unsigned u64s) | |
233 | { | |
4637429e | 234 | struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); |
1c6fdbd8 KO |
235 | ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0; |
236 | ssize_t d = -old_u64s + u64s; | |
237 | ||
238 | if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) | |
239 | return NULL; | |
240 | ||
241 | if (sb->fs_sb) { | |
242 | struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb); | |
243 | struct bch_dev *ca; | |
244 | unsigned i; | |
245 | ||
246 | lockdep_assert_held(&c->sb_lock); | |
247 | ||
248 | /* XXX: we're not checking that offline device have enough space */ | |
249 | ||
250 | for_each_online_member(ca, c, i) { | |
96dea3d5 | 251 | struct bch_sb_handle *dev_sb = &ca->disk_sb; |
1c6fdbd8 | 252 | |
96dea3d5 | 253 | if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { |
1c6fdbd8 KO |
254 | percpu_ref_put(&ca->ref); |
255 | return NULL; | |
256 | } | |
257 | } | |
258 | } | |
259 | ||
4637429e | 260 | f = bch2_sb_field_get_id(sb->sb, type); |
1c6fdbd8 | 261 | f = __bch2_sb_field_resize(sb, f, u64s); |
af9d3bc2 KO |
262 | if (f) |
263 | f->type = cpu_to_le32(type); | |
1c6fdbd8 KO |
264 | return f; |
265 | } | |
266 | ||
267 | /* Superblock validate: */ | |
268 | ||
efe68e1d | 269 | static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out) |
1c6fdbd8 KO |
270 | { |
271 | u64 offset, prev_offset, max_sectors; | |
272 | unsigned i; | |
273 | ||
bf5a261c KO |
274 | BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512); |
275 | ||
1c6fdbd8 | 276 | if (!uuid_equal(&layout->magic, &BCACHE_MAGIC) && |
efe68e1d | 277 | !uuid_equal(&layout->magic, &BCHFS_MAGIC)) { |
401ec4db | 278 | prt_printf(out, "Not a bcachefs superblock layout"); |
78c0b75c | 279 | return -BCH_ERR_invalid_sb_layout; |
efe68e1d | 280 | } |
1c6fdbd8 | 281 | |
efe68e1d | 282 | if (layout->layout_type != 0) { |
401ec4db | 283 | prt_printf(out, "Invalid superblock layout type %u", |
efe68e1d | 284 | layout->layout_type); |
78c0b75c | 285 | return -BCH_ERR_invalid_sb_layout_type; |
efe68e1d | 286 | } |
1c6fdbd8 | 287 | |
efe68e1d | 288 | if (!layout->nr_superblocks) { |
401ec4db | 289 | prt_printf(out, "Invalid superblock layout: no superblocks"); |
78c0b75c | 290 | return -BCH_ERR_invalid_sb_layout_nr_superblocks; |
efe68e1d | 291 | } |
1c6fdbd8 | 292 | |
efe68e1d | 293 | if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) { |
401ec4db | 294 | prt_printf(out, "Invalid superblock layout: too many superblocks"); |
78c0b75c | 295 | return -BCH_ERR_invalid_sb_layout_nr_superblocks; |
efe68e1d | 296 | } |
1c6fdbd8 KO |
297 | |
298 | max_sectors = 1 << layout->sb_max_size_bits; | |
299 | ||
300 | prev_offset = le64_to_cpu(layout->sb_offset[0]); | |
301 | ||
302 | for (i = 1; i < layout->nr_superblocks; i++) { | |
303 | offset = le64_to_cpu(layout->sb_offset[i]); | |
304 | ||
efe68e1d | 305 | if (offset < prev_offset + max_sectors) { |
401ec4db | 306 | prt_printf(out, "Invalid superblock layout: superblocks overlap\n" |
efe68e1d KO |
307 | " (sb %u ends at %llu next starts at %llu", |
308 | i - 1, prev_offset + max_sectors, offset); | |
78c0b75c | 309 | return -BCH_ERR_invalid_sb_layout_superblocks_overlap; |
efe68e1d | 310 | } |
1c6fdbd8 KO |
311 | prev_offset = offset; |
312 | } | |
313 | ||
efe68e1d | 314 | return 0; |
1c6fdbd8 KO |
315 | } |
316 | ||
a02a0121 | 317 | static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) |
1c6fdbd8 | 318 | { |
a02a0121 KO |
319 | u16 version = le16_to_cpu(sb->version); |
320 | u16 version_min = le16_to_cpu(sb->version_min); | |
321 | ||
322 | if (!bch2_version_compatible(version)) { | |
323 | prt_str(out, "Unsupported superblock version "); | |
324 | bch2_version_to_text(out, version); | |
325 | prt_str(out, " (min "); | |
326 | bch2_version_to_text(out, bcachefs_metadata_version_min); | |
327 | prt_str(out, ", max "); | |
328 | bch2_version_to_text(out, bcachefs_metadata_version_current); | |
329 | prt_str(out, ")"); | |
78c0b75c | 330 | return -BCH_ERR_invalid_sb_version; |
efe68e1d | 331 | } |
1c6fdbd8 | 332 | |
a02a0121 KO |
333 | if (!bch2_version_compatible(version_min)) { |
334 | prt_str(out, "Unsupported superblock version_min "); | |
335 | bch2_version_to_text(out, version_min); | |
336 | prt_str(out, " (min "); | |
337 | bch2_version_to_text(out, bcachefs_metadata_version_min); | |
338 | prt_str(out, ", max "); | |
339 | bch2_version_to_text(out, bcachefs_metadata_version_current); | |
340 | prt_str(out, ")"); | |
78c0b75c | 341 | return -BCH_ERR_invalid_sb_version; |
efe68e1d KO |
342 | } |
343 | ||
344 | if (version_min > version) { | |
a02a0121 KO |
345 | prt_str(out, "Bad minimum version "); |
346 | bch2_version_to_text(out, version_min); | |
347 | prt_str(out, ", greater than version field "); | |
348 | bch2_version_to_text(out, version); | |
78c0b75c | 349 | return -BCH_ERR_invalid_sb_version; |
efe68e1d | 350 | } |
26609b61 | 351 | |
a02a0121 KO |
352 | return 0; |
353 | } | |
354 | ||
355 | static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, | |
356 | int rw) | |
357 | { | |
358 | struct bch_sb *sb = disk_sb->sb; | |
359 | struct bch_sb_field *f; | |
9af26120 | 360 | struct bch_sb_field_members_v1 *mi; |
a02a0121 KO |
361 | enum bch_opt_id opt_id; |
362 | u16 block_size; | |
363 | int ret; | |
364 | ||
365 | ret = bch2_sb_compatible(sb, out); | |
366 | if (ret) | |
367 | return ret; | |
368 | ||
c258f28e | 369 | if (sb->features[1] || |
efe68e1d | 370 | (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) { |
401ec4db | 371 | prt_printf(out, "Filesystem has incompatible features"); |
78c0b75c | 372 | return -BCH_ERR_invalid_sb_features; |
efe68e1d | 373 | } |
c258f28e | 374 | |
1c6fdbd8 KO |
375 | block_size = le16_to_cpu(sb->block_size); |
376 | ||
efe68e1d | 377 | if (block_size > PAGE_SECTORS) { |
401ec4db | 378 | prt_printf(out, "Block size too big (got %u, max %u)", |
efe68e1d | 379 | block_size, PAGE_SECTORS); |
78c0b75c | 380 | return -BCH_ERR_invalid_sb_block_size; |
efe68e1d | 381 | } |
1c6fdbd8 | 382 | |
efe68e1d | 383 | if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { |
401ec4db | 384 | prt_printf(out, "Bad user UUID (got zeroes)"); |
78c0b75c | 385 | return -BCH_ERR_invalid_sb_uuid; |
efe68e1d | 386 | } |
1c6fdbd8 | 387 | |
efe68e1d | 388 | if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) { |
6bf3766b | 389 | prt_printf(out, "Bad internal UUID (got zeroes)"); |
78c0b75c | 390 | return -BCH_ERR_invalid_sb_uuid; |
efe68e1d | 391 | } |
1c6fdbd8 KO |
392 | |
393 | if (!sb->nr_devices || | |
efe68e1d | 394 | sb->nr_devices > BCH_SB_MEMBERS_MAX) { |
401ec4db | 395 | prt_printf(out, "Bad number of member devices %u (max %u)", |
efe68e1d | 396 | sb->nr_devices, BCH_SB_MEMBERS_MAX); |
78c0b75c | 397 | return -BCH_ERR_invalid_sb_too_many_members; |
efe68e1d | 398 | } |
1c6fdbd8 | 399 | |
efe68e1d | 400 | if (sb->dev_idx >= sb->nr_devices) { |
401ec4db | 401 | prt_printf(out, "Bad dev_idx (got %u, nr_devices %u)", |
efe68e1d | 402 | sb->dev_idx, sb->nr_devices); |
78c0b75c | 403 | return -BCH_ERR_invalid_sb_dev_idx; |
efe68e1d | 404 | } |
1c6fdbd8 KO |
405 | |
406 | if (!sb->time_precision || | |
efe68e1d | 407 | le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) { |
401ec4db | 408 | prt_printf(out, "Invalid time precision: %u (min 1, max %lu)", |
efe68e1d | 409 | le32_to_cpu(sb->time_precision), NSEC_PER_SEC); |
78c0b75c | 410 | return -BCH_ERR_invalid_sb_time_precision; |
efe68e1d | 411 | } |
1c6fdbd8 | 412 | |
b8559f1a KO |
413 | if (rw == READ) { |
414 | /* | |
415 | * Been seeing a bug where these are getting inexplicably | |
a02a0121 | 416 | * zeroed, so we're now validating them, but we have to be |
b8559f1a KO |
417 | * careful not to preven people's filesystems from mounting: |
418 | */ | |
419 | if (!BCH_SB_JOURNAL_FLUSH_DELAY(sb)) | |
420 | SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000); | |
421 | if (!BCH_SB_JOURNAL_RECLAIM_DELAY(sb)) | |
422 | SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 1000); | |
813e0cec KO |
423 | |
424 | if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) | |
425 | SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); | |
b8559f1a KO |
426 | } |
427 | ||
63c4b254 KO |
428 | for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { |
429 | const struct bch_option *opt = bch2_opt_table + opt_id; | |
430 | ||
431 | if (opt->get_sb != BCH2_NO_SB_OPT) { | |
432 | u64 v = bch2_opt_from_sb(sb, opt_id); | |
433 | ||
401ec4db | 434 | prt_printf(out, "Invalid option "); |
63c4b254 KO |
435 | ret = bch2_opt_validate(opt, v, out); |
436 | if (ret) | |
437 | return ret; | |
438 | ||
439 | printbuf_reset(out); | |
440 | } | |
441 | } | |
442 | ||
1c6fdbd8 | 443 | /* validate layout */ |
efe68e1d KO |
444 | ret = validate_sb_layout(&sb->layout, out); |
445 | if (ret) | |
446 | return ret; | |
1c6fdbd8 KO |
447 | |
448 | vstruct_for_each(sb, f) { | |
efe68e1d | 449 | if (!f->u64s) { |
78c0b75c | 450 | prt_printf(out, "Invalid superblock: optional field with size 0 (type %u)", |
efe68e1d | 451 | le32_to_cpu(f->type)); |
78c0b75c | 452 | return -BCH_ERR_invalid_sb_field_size; |
efe68e1d | 453 | } |
1c6fdbd8 | 454 | |
efe68e1d | 455 | if (vstruct_next(f) > vstruct_last(sb)) { |
401ec4db | 456 | prt_printf(out, "Invalid superblock: optional field extends past end of superblock (type %u)", |
efe68e1d | 457 | le32_to_cpu(f->type)); |
78c0b75c | 458 | return -BCH_ERR_invalid_sb_field_size; |
efe68e1d | 459 | } |
1c6fdbd8 KO |
460 | } |
461 | ||
462 | /* members must be validated first: */ | |
4637429e | 463 | mi = bch2_sb_field_get(sb, members_v1); |
efe68e1d | 464 | if (!mi) { |
401ec4db | 465 | prt_printf(out, "Invalid superblock: member info area missing"); |
78c0b75c | 466 | return -BCH_ERR_invalid_sb_members_missing; |
efe68e1d | 467 | } |
1c6fdbd8 | 468 | |
efe68e1d KO |
469 | ret = bch2_sb_field_validate(sb, &mi->field, out); |
470 | if (ret) | |
471 | return ret; | |
1c6fdbd8 KO |
472 | |
473 | vstruct_for_each(sb, f) { | |
9af26120 | 474 | if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1) |
1c6fdbd8 KO |
475 | continue; |
476 | ||
efe68e1d KO |
477 | ret = bch2_sb_field_validate(sb, f, out); |
478 | if (ret) | |
479 | return ret; | |
1c6fdbd8 KO |
480 | } |
481 | ||
efe68e1d | 482 | return 0; |
1c6fdbd8 KO |
483 | } |
484 | ||
485 | /* device open: */ | |
486 | ||
487 | static void bch2_sb_update(struct bch_fs *c) | |
488 | { | |
489 | struct bch_sb *src = c->disk_sb.sb; | |
1c6fdbd8 KO |
490 | struct bch_dev *ca; |
491 | unsigned i; | |
492 | ||
493 | lockdep_assert_held(&c->sb_lock); | |
494 | ||
495 | c->sb.uuid = src->uuid; | |
496 | c->sb.user_uuid = src->user_uuid; | |
26609b61 | 497 | c->sb.version = le16_to_cpu(src->version); |
84cc758d | 498 | c->sb.version_min = le16_to_cpu(src->version_min); |
813e0cec | 499 | c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); |
1c6fdbd8 KO |
500 | c->sb.nr_devices = src->nr_devices; |
501 | c->sb.clean = BCH_SB_CLEAN(src); | |
502 | c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); | |
595c1e9b KO |
503 | |
504 | c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision); | |
505 | c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit; | |
506 | ||
507 | /* XXX this is wrong, we need a 96 or 128 bit integer type */ | |
508 | c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo), | |
509 | c->sb.nsec_per_time_unit); | |
1c6fdbd8 | 510 | c->sb.time_base_hi = le32_to_cpu(src->time_base_hi); |
595c1e9b | 511 | |
1c6fdbd8 | 512 | c->sb.features = le64_to_cpu(src->features[0]); |
1df42b57 | 513 | c->sb.compat = le64_to_cpu(src->compat[0]); |
1c6fdbd8 | 514 | |
1241df58 HS |
515 | for_each_member_device(ca, c, i) { |
516 | struct bch_member m = bch2_sb_member_get(src, i); | |
517 | ca->mi = bch2_mi_to_cpu(&m); | |
518 | } | |
1c6fdbd8 KO |
519 | } |
520 | ||
4bd4035e | 521 | static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) |
1c6fdbd8 KO |
522 | { |
523 | struct bch_sb_field *src_f, *dst_f; | |
524 | struct bch_sb *dst = dst_handle->sb; | |
af9d3bc2 | 525 | unsigned i; |
1c6fdbd8 KO |
526 | |
527 | dst->version = src->version; | |
26609b61 | 528 | dst->version_min = src->version_min; |
1c6fdbd8 KO |
529 | dst->seq = src->seq; |
530 | dst->uuid = src->uuid; | |
531 | dst->user_uuid = src->user_uuid; | |
532 | memcpy(dst->label, src->label, sizeof(dst->label)); | |
533 | ||
534 | dst->block_size = src->block_size; | |
535 | dst->nr_devices = src->nr_devices; | |
536 | ||
537 | dst->time_base_lo = src->time_base_lo; | |
538 | dst->time_base_hi = src->time_base_hi; | |
539 | dst->time_precision = src->time_precision; | |
540 | ||
541 | memcpy(dst->flags, src->flags, sizeof(dst->flags)); | |
542 | memcpy(dst->features, src->features, sizeof(dst->features)); | |
543 | memcpy(dst->compat, src->compat, sizeof(dst->compat)); | |
544 | ||
af9d3bc2 | 545 | for (i = 0; i < BCH_SB_FIELD_NR; i++) { |
4bd4035e KO |
546 | int d; |
547 | ||
25be2e5d | 548 | if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS) |
1c6fdbd8 KO |
549 | continue; |
550 | ||
4637429e KO |
551 | src_f = bch2_sb_field_get_id(src, i); |
552 | dst_f = bch2_sb_field_get_id(dst, i); | |
4bd4035e KO |
553 | |
554 | d = (src_f ? le32_to_cpu(src_f->u64s) : 0) - | |
555 | (dst_f ? le32_to_cpu(dst_f->u64s) : 0); | |
556 | if (d > 0) { | |
1e81f89b KO |
557 | int ret = bch2_sb_realloc(dst_handle, |
558 | le32_to_cpu(dst_handle->sb->u64s) + d); | |
559 | ||
4bd4035e KO |
560 | if (ret) |
561 | return ret; | |
562 | ||
563 | dst = dst_handle->sb; | |
4637429e | 564 | dst_f = bch2_sb_field_get_id(dst, i); |
4bd4035e KO |
565 | } |
566 | ||
1c6fdbd8 | 567 | dst_f = __bch2_sb_field_resize(dst_handle, dst_f, |
af9d3bc2 | 568 | src_f ? le32_to_cpu(src_f->u64s) : 0); |
1c6fdbd8 | 569 | |
af9d3bc2 KO |
570 | if (src_f) |
571 | memcpy(dst_f, src_f, vstruct_bytes(src_f)); | |
1c6fdbd8 | 572 | } |
4bd4035e KO |
573 | |
574 | return 0; | |
1c6fdbd8 KO |
575 | } |
576 | ||
577 | int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src) | |
578 | { | |
1c6fdbd8 KO |
579 | int ret; |
580 | ||
581 | lockdep_assert_held(&c->sb_lock); | |
582 | ||
4bd4035e KO |
583 | ret = bch2_sb_realloc(&c->disk_sb, 0) ?: |
584 | __copy_super(&c->disk_sb, src) ?: | |
585 | bch2_sb_replicas_to_cpu_replicas(c) ?: | |
586 | bch2_sb_disk_groups_to_cpu(c); | |
1c6fdbd8 KO |
587 | if (ret) |
588 | return ret; | |
589 | ||
590 | bch2_sb_update(c); | |
591 | return 0; | |
592 | } | |
593 | ||
594 | int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) | |
595 | { | |
4bd4035e | 596 | return __copy_super(&ca->disk_sb, c->disk_sb.sb); |
1c6fdbd8 KO |
597 | } |
598 | ||
599 | /* read superblock: */ | |
600 | ||
efe68e1d | 601 | static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) |
1c6fdbd8 KO |
602 | { |
603 | struct bch_csum csum; | |
604 | size_t bytes; | |
efe68e1d | 605 | int ret; |
1c6fdbd8 KO |
606 | reread: |
607 | bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); | |
608 | sb->bio->bi_iter.bi_sector = offset; | |
9d8022db | 609 | bch2_bio_map(sb->bio, sb->sb, sb->buffer_size); |
1c6fdbd8 | 610 | |
efe68e1d KO |
611 | ret = submit_bio_wait(sb->bio); |
612 | if (ret) { | |
401ec4db | 613 | prt_printf(err, "IO error: %i", ret); |
efe68e1d KO |
614 | return ret; |
615 | } | |
1c6fdbd8 KO |
616 | |
617 | if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) && | |
efe68e1d | 618 | !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) { |
401ec4db | 619 | prt_printf(err, "Not a bcachefs superblock"); |
78c0b75c | 620 | return -BCH_ERR_invalid_sb_magic; |
efe68e1d | 621 | } |
1c6fdbd8 | 622 | |
a02a0121 KO |
623 | ret = bch2_sb_compatible(sb->sb, err); |
624 | if (ret) | |
625 | return ret; | |
1c6fdbd8 KO |
626 | |
627 | bytes = vstruct_bytes(sb->sb); | |
628 | ||
efe68e1d | 629 | if (bytes > 512 << sb->sb->layout.sb_max_size_bits) { |
401ec4db | 630 | prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)", |
efe68e1d | 631 | bytes, 512UL << sb->sb->layout.sb_max_size_bits); |
78c0b75c | 632 | return -BCH_ERR_invalid_sb_too_big; |
efe68e1d | 633 | } |
1c6fdbd8 | 634 | |
9d8022db | 635 | if (bytes > sb->buffer_size) { |
65d48e35 KO |
636 | ret = bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)); |
637 | if (ret) | |
638 | return ret; | |
1c6fdbd8 KO |
639 | goto reread; |
640 | } | |
641 | ||
efe68e1d | 642 | if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) { |
401ec4db | 643 | prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); |
78c0b75c | 644 | return -BCH_ERR_invalid_sb_csum_type; |
efe68e1d | 645 | } |
1c6fdbd8 KO |
646 | |
647 | /* XXX: verify MACs */ | |
648 | csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb), | |
649 | null_nonce(), sb->sb); | |
650 | ||
efe68e1d | 651 | if (bch2_crc_cmp(csum, sb->sb->csum)) { |
401ec4db | 652 | prt_printf(err, "bad checksum"); |
78c0b75c | 653 | return -BCH_ERR_invalid_sb_csum; |
efe68e1d | 654 | } |
1c6fdbd8 | 655 | |
03e183cb KO |
656 | sb->seq = le64_to_cpu(sb->sb->seq); |
657 | ||
efe68e1d | 658 | return 0; |
1c6fdbd8 KO |
659 | } |
660 | ||
661 | int bch2_read_super(const char *path, struct bch_opts *opts, | |
662 | struct bch_sb_handle *sb) | |
663 | { | |
664 | u64 offset = opt_get(*opts, sb); | |
665 | struct bch_sb_layout layout; | |
fa8e94fa | 666 | struct printbuf err = PRINTBUF; |
1c6fdbd8 KO |
667 | __le64 *i; |
668 | int ret; | |
f39d1aca KO |
669 | #ifndef __KERNEL__ |
670 | retry: | |
671 | #endif | |
1c6fdbd8 KO |
672 | memset(sb, 0, sizeof(*sb)); |
673 | sb->mode = BLK_OPEN_READ; | |
674 | sb->have_bio = true; | |
675 | sb->holder = kmalloc(1, GFP_KERNEL); | |
676 | if (!sb->holder) | |
677 | return -ENOMEM; | |
678 | ||
63807d95 BF |
679 | sb->sb_name = kstrdup(path, GFP_KERNEL); |
680 | if (!sb->sb_name) | |
681 | return -ENOMEM; | |
682 | ||
f39d1aca KO |
683 | #ifndef __KERNEL__ |
684 | if (opt_get(*opts, direct_io) == false) | |
793a06d9 | 685 | sb->mode |= BLK_OPEN_BUFFERED; |
f39d1aca KO |
686 | #endif |
687 | ||
1c6fdbd8 KO |
688 | if (!opt_get(*opts, noexcl)) |
689 | sb->mode |= BLK_OPEN_EXCL; | |
690 | ||
691 | if (!opt_get(*opts, nochanges)) | |
692 | sb->mode |= BLK_OPEN_WRITE; | |
693 | ||
694 | sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); | |
695 | if (IS_ERR(sb->bdev) && | |
696 | PTR_ERR(sb->bdev) == -EACCES && | |
697 | opt_get(*opts, read_only)) { | |
698 | sb->mode &= ~BLK_OPEN_WRITE; | |
699 | ||
700 | sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); | |
701 | if (!IS_ERR(sb->bdev)) | |
702 | opt_set(*opts, nochanges, true); | |
703 | } | |
704 | ||
705 | if (IS_ERR(sb->bdev)) { | |
706 | ret = PTR_ERR(sb->bdev); | |
707 | goto out; | |
708 | } | |
709 | ||
1c6fdbd8 | 710 | ret = bch2_sb_realloc(sb, 0); |
efe68e1d | 711 | if (ret) { |
401ec4db | 712 | prt_printf(&err, "error allocating memory for superblock"); |
1c6fdbd8 | 713 | goto err; |
efe68e1d | 714 | } |
1c6fdbd8 | 715 | |
efe68e1d | 716 | if (bch2_fs_init_fault("read_super")) { |
401ec4db | 717 | prt_printf(&err, "dynamic fault"); |
efe68e1d | 718 | ret = -EFAULT; |
1c6fdbd8 | 719 | goto err; |
efe68e1d | 720 | } |
1c6fdbd8 | 721 | |
efe68e1d KO |
722 | ret = read_one_super(sb, offset, &err); |
723 | if (!ret) | |
1c6fdbd8 KO |
724 | goto got_super; |
725 | ||
726 | if (opt_defined(*opts, sb)) | |
727 | goto err; | |
728 | ||
01ccee22 | 729 | printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s\n", |
fa8e94fa KO |
730 | path, err.buf); |
731 | printbuf_reset(&err); | |
1c6fdbd8 KO |
732 | |
733 | /* | |
734 | * Error reading primary superblock - read location of backup | |
735 | * superblocks: | |
736 | */ | |
737 | bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); | |
738 | sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR; | |
1c6fdbd8 KO |
739 | /* |
740 | * use sb buffer to read layout, since sb buffer is page aligned but | |
741 | * layout won't be: | |
742 | */ | |
885678f6 | 743 | bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout)); |
1c6fdbd8 | 744 | |
efe68e1d KO |
745 | ret = submit_bio_wait(sb->bio); |
746 | if (ret) { | |
401ec4db | 747 | prt_printf(&err, "IO error: %i", ret); |
1c6fdbd8 | 748 | goto err; |
efe68e1d | 749 | } |
1c6fdbd8 KO |
750 | |
751 | memcpy(&layout, sb->sb, sizeof(layout)); | |
efe68e1d KO |
752 | ret = validate_sb_layout(&layout, &err); |
753 | if (ret) | |
1c6fdbd8 KO |
754 | goto err; |
755 | ||
756 | for (i = layout.sb_offset; | |
757 | i < layout.sb_offset + layout.nr_superblocks; i++) { | |
758 | offset = le64_to_cpu(*i); | |
759 | ||
760 | if (offset == opt_get(*opts, sb)) | |
761 | continue; | |
762 | ||
efe68e1d KO |
763 | ret = read_one_super(sb, offset, &err); |
764 | if (!ret) | |
1c6fdbd8 KO |
765 | goto got_super; |
766 | } | |
767 | ||
1c6fdbd8 KO |
768 | goto err; |
769 | ||
770 | got_super: | |
1c6fdbd8 | 771 | if (le16_to_cpu(sb->sb->block_size) << 9 < |
f39d1aca KO |
772 | bdev_logical_block_size(sb->bdev) && |
773 | opt_get(*opts, direct_io)) { | |
774 | #ifndef __KERNEL__ | |
775 | opt_set(*opts, direct_io, false); | |
776 | bch2_free_super(sb); | |
777 | goto retry; | |
778 | #endif | |
401ec4db | 779 | prt_printf(&err, "block size (%u) smaller than device block size (%u)", |
8244f320 KO |
780 | le16_to_cpu(sb->sb->block_size) << 9, |
781 | bdev_logical_block_size(sb->bdev)); | |
78c0b75c | 782 | ret = -BCH_ERR_block_size_too_small; |
efe68e1d | 783 | goto err; |
8244f320 | 784 | } |
1c6fdbd8 KO |
785 | |
786 | ret = 0; | |
787 | sb->have_layout = true; | |
efe68e1d | 788 | |
b8559f1a | 789 | ret = bch2_sb_validate(sb, &err, READ); |
efe68e1d | 790 | if (ret) { |
01ccee22 | 791 | printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n", |
fa8e94fa | 792 | path, err.buf); |
efe68e1d KO |
793 | goto err_no_print; |
794 | } | |
1c6fdbd8 | 795 | out: |
fa8e94fa | 796 | printbuf_exit(&err); |
1c6fdbd8 KO |
797 | return ret; |
798 | err: | |
01ccee22 | 799 | printk(KERN_ERR "bcachefs (%s): error reading superblock: %s\n", |
fa8e94fa | 800 | path, err.buf); |
8244f320 KO |
801 | err_no_print: |
802 | bch2_free_super(sb); | |
1c6fdbd8 KO |
803 | goto out; |
804 | } | |
805 | ||
806 | /* write superblock: */ | |
807 | ||
808 | static void write_super_endio(struct bio *bio) | |
809 | { | |
810 | struct bch_dev *ca = bio->bi_private; | |
811 | ||
812 | /* XXX: return errors directly */ | |
813 | ||
94119eeb KO |
814 | if (bch2_dev_io_err_on(bio->bi_status, ca, |
815 | bio_data_dir(bio) | |
816 | ? BCH_MEMBER_ERROR_write | |
817 | : BCH_MEMBER_ERROR_read, | |
818 | "superblock %s error: %s", | |
819 | bio_data_dir(bio) ? "write" : "read", | |
63b214e7 | 820 | bch2_blk_status_to_str(bio->bi_status))) |
1c6fdbd8 KO |
821 | ca->sb_write_error = 1; |
822 | ||
823 | closure_put(&ca->fs->sb_write); | |
824 | percpu_ref_put(&ca->io_ref); | |
825 | } | |
826 | ||
03e183cb KO |
827 | static void read_back_super(struct bch_fs *c, struct bch_dev *ca) |
828 | { | |
829 | struct bch_sb *sb = ca->disk_sb.sb; | |
830 | struct bio *bio = ca->disk_sb.bio; | |
831 | ||
832 | bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); | |
833 | bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); | |
03e183cb KO |
834 | bio->bi_end_io = write_super_endio; |
835 | bio->bi_private = ca; | |
885678f6 | 836 | bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE); |
03e183cb | 837 | |
89fd25be | 838 | this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], |
03e183cb KO |
839 | bio_sectors(bio)); |
840 | ||
841 | percpu_ref_get(&ca->io_ref); | |
842 | closure_bio_submit(bio, &c->sb_write); | |
843 | } | |
844 | ||
1c6fdbd8 KO |
845 | static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) |
846 | { | |
847 | struct bch_sb *sb = ca->disk_sb.sb; | |
848 | struct bio *bio = ca->disk_sb.bio; | |
849 | ||
850 | sb->offset = sb->layout.sb_offset[idx]; | |
851 | ||
120f63e3 | 852 | SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false)); |
1c6fdbd8 KO |
853 | sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), |
854 | null_nonce(), sb); | |
855 | ||
856 | bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); | |
857 | bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); | |
1c6fdbd8 KO |
858 | bio->bi_end_io = write_super_endio; |
859 | bio->bi_private = ca; | |
885678f6 KO |
860 | bch2_bio_map(bio, sb, |
861 | roundup((size_t) vstruct_bytes(sb), | |
862 | bdev_logical_block_size(ca->disk_sb.bdev))); | |
1c6fdbd8 | 863 | |
89fd25be | 864 | this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], |
1c6fdbd8 KO |
865 | bio_sectors(bio)); |
866 | ||
867 | percpu_ref_get(&ca->io_ref); | |
868 | closure_bio_submit(bio, &c->sb_write); | |
869 | } | |
870 | ||
03e183cb | 871 | int bch2_write_super(struct bch_fs *c) |
1c6fdbd8 KO |
872 | { |
873 | struct closure *cl = &c->sb_write; | |
874 | struct bch_dev *ca; | |
fa8e94fa | 875 | struct printbuf err = PRINTBUF; |
1c6fdbd8 | 876 | unsigned i, sb = 0, nr_wrote; |
1c6fdbd8 KO |
877 | struct bch_devs_mask sb_written; |
878 | bool wrote, can_mount_without_written, can_mount_with_written; | |
98f2197d | 879 | unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; |
03e183cb | 880 | int ret = 0; |
1c6fdbd8 | 881 | |
674cfc26 | 882 | trace_and_count(c, write_super, c, _RET_IP_); |
4254f5bf | 883 | |
98f2197d KO |
884 | if (c->opts.very_degraded) |
885 | degraded_flags |= BCH_FORCE_IF_LOST; | |
886 | ||
1c6fdbd8 KO |
887 | lockdep_assert_held(&c->sb_lock); |
888 | ||
889 | closure_init_stack(cl); | |
890 | memset(&sb_written, 0, sizeof(sb_written)); | |
891 | ||
ba8eeae8 KO |
892 | /* Make sure we're using the new magic numbers: */ |
893 | c->disk_sb.sb->magic = BCHFS_MAGIC; | |
894 | c->disk_sb.sb->layout.magic = BCHFS_MAGIC; | |
e1538212 | 895 | |
1c6fdbd8 KO |
896 | le64_add_cpu(&c->disk_sb.sb->seq, 1); |
897 | ||
0bc166ff KO |
898 | if (test_bit(BCH_FS_ERROR, &c->flags)) |
899 | SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); | |
aae15aaf KO |
900 | if (test_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags)) |
901 | SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1); | |
0bc166ff | 902 | |
7d6f07ed KO |
903 | SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); |
904 | ||
104c6974 | 905 | bch2_sb_counters_from_cpu(c); |
f5d26fa3 | 906 | bch2_sb_members_from_cpu(c); |
94119eeb | 907 | bch2_sb_members_cpy_v2_v1(&c->disk_sb); |
f5d26fa3 | 908 | bch2_sb_errors_from_cpu(c); |
104c6974 | 909 | |
1c6fdbd8 KO |
910 | for_each_online_member(ca, c, i) |
911 | bch2_sb_from_fs(c, ca); | |
912 | ||
913 | for_each_online_member(ca, c, i) { | |
fa8e94fa | 914 | printbuf_reset(&err); |
efe68e1d | 915 | |
b8559f1a | 916 | ret = bch2_sb_validate(&ca->disk_sb, &err, WRITE); |
efe68e1d | 917 | if (ret) { |
fa8e94fa | 918 | bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); |
efe68e1d | 919 | percpu_ref_put(&ca->io_ref); |
1c6fdbd8 KO |
920 | goto out; |
921 | } | |
922 | } | |
923 | ||
0bc166ff | 924 | if (c->opts.nochanges) |
1c6fdbd8 KO |
925 | goto out; |
926 | ||
80c80164 KO |
927 | /* |
928 | * Defer writing the superblock until filesystem initialization is | |
929 | * complete - don't write out a partly initialized superblock: | |
930 | */ | |
931 | if (!BCH_SB_INITIALIZED(c->disk_sb.sb)) | |
932 | goto out; | |
933 | ||
1c6fdbd8 KO |
934 | for_each_online_member(ca, c, i) { |
935 | __set_bit(ca->dev_idx, sb_written.d); | |
936 | ca->sb_write_error = 0; | |
937 | } | |
938 | ||
03e183cb KO |
939 | for_each_online_member(ca, c, i) |
940 | read_back_super(c, ca); | |
941 | closure_sync(cl); | |
942 | ||
943 | for_each_online_member(ca, c, i) { | |
bf159463 KO |
944 | if (ca->sb_write_error) |
945 | continue; | |
946 | ||
947 | if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) { | |
948 | bch2_fs_fatal_error(c, | |
949 | "Superblock write was silently dropped! (seq %llu expected %llu)", | |
950 | le64_to_cpu(ca->sb_read_scratch->seq), | |
951 | ca->disk_sb.seq); | |
952 | percpu_ref_put(&ca->io_ref); | |
858536c7 | 953 | ret = -BCH_ERR_erofs_sb_err; |
bf159463 KO |
954 | goto out; |
955 | } | |
956 | ||
957 | if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { | |
03e183cb | 958 | bch2_fs_fatal_error(c, |
bf159463 KO |
959 | "Superblock modified by another process (seq %llu expected %llu)", |
960 | le64_to_cpu(ca->sb_read_scratch->seq), | |
961 | ca->disk_sb.seq); | |
03e183cb | 962 | percpu_ref_put(&ca->io_ref); |
858536c7 | 963 | ret = -BCH_ERR_erofs_sb_err; |
03e183cb KO |
964 | goto out; |
965 | } | |
966 | } | |
967 | ||
1c6fdbd8 KO |
968 | do { |
969 | wrote = false; | |
970 | for_each_online_member(ca, c, i) | |
03e183cb KO |
971 | if (!ca->sb_write_error && |
972 | sb < ca->disk_sb.sb->layout.nr_superblocks) { | |
1c6fdbd8 KO |
973 | write_one_super(c, ca, sb); |
974 | wrote = true; | |
975 | } | |
976 | closure_sync(cl); | |
977 | sb++; | |
978 | } while (wrote); | |
979 | ||
03e183cb | 980 | for_each_online_member(ca, c, i) { |
1c6fdbd8 KO |
981 | if (ca->sb_write_error) |
982 | __clear_bit(ca->dev_idx, sb_written.d); | |
03e183cb KO |
983 | else |
984 | ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq); | |
985 | } | |
1c6fdbd8 KO |
986 | |
987 | nr_wrote = dev_mask_nr(&sb_written); | |
988 | ||
989 | can_mount_with_written = | |
98f2197d | 990 | bch2_have_enough_devs(c, sb_written, degraded_flags, false); |
1c6fdbd8 KO |
991 | |
992 | for (i = 0; i < ARRAY_SIZE(sb_written.d); i++) | |
993 | sb_written.d[i] = ~sb_written.d[i]; | |
994 | ||
995 | can_mount_without_written = | |
98f2197d | 996 | bch2_have_enough_devs(c, sb_written, degraded_flags, false); |
1c6fdbd8 KO |
997 | |
998 | /* | |
999 | * If we would be able to mount _without_ the devices we successfully | |
1000 | * wrote superblocks to, we weren't able to write to enough devices: | |
1001 | * | |
1002 | * Exception: if we can mount without the successes because we haven't | |
1003 | * written anything (new filesystem), we continue if we'd be able to | |
1004 | * mount with the devices we did successfully write to: | |
1005 | */ | |
03e183cb | 1006 | if (bch2_fs_fatal_err_on(!nr_wrote || |
fcb3431b | 1007 | !can_mount_with_written || |
03e183cb KO |
1008 | (can_mount_without_written && |
1009 | !can_mount_with_written), c, | |
d647db31 KO |
1010 | "Unable to write superblock to sufficient devices (from %ps)", |
1011 | (void *) _RET_IP_)) | |
03e183cb | 1012 | ret = -1; |
1c6fdbd8 KO |
1013 | out: |
1014 | /* Make new options visible after they're persistent: */ | |
1015 | bch2_sb_update(c); | |
fa8e94fa | 1016 | printbuf_exit(&err); |
03e183cb | 1017 | return ret; |
1c6fdbd8 KO |
1018 | } |
1019 | ||
ba239c95 KO |
1020 | void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) |
1021 | { | |
1022 | mutex_lock(&c->sb_lock); | |
1023 | if (!(c->sb.features & (1ULL << feat))) { | |
1024 | c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); | |
1025 | ||
1026 | bch2_write_super(c); | |
1027 | } | |
1028 | mutex_unlock(&c->sb_lock); | |
1029 | } | |
1030 | ||
6619d846 KO |
1031 | /* Downgrade if superblock is at a higher version than currently supported: */ |
1032 | void bch2_sb_maybe_downgrade(struct bch_fs *c) | |
1c6fdbd8 | 1033 | { |
6619d846 | 1034 | lockdep_assert_held(&c->sb_lock); |
24964e1c | 1035 | |
ba8eeae8 KO |
1036 | /* |
1037 | * Downgrade, if superblock is at a higher version than currently | |
1038 | * supported: | |
1039 | */ | |
24964e1c KO |
1040 | if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) |
1041 | SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); | |
ba8eeae8 | 1042 | if (c->sb.version > bcachefs_metadata_version_current) |
24964e1c | 1043 | c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); |
ba8eeae8 KO |
1044 | if (c->sb.version_min > bcachefs_metadata_version_current) |
1045 | c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); | |
1046 | c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); | |
6619d846 KO |
1047 | } |
1048 | ||
1049 | void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) | |
1050 | { | |
1051 | lockdep_assert_held(&c->sb_lock); | |
24964e1c | 1052 | |
6619d846 KO |
1053 | c->disk_sb.sb->version = cpu_to_le16(new_version); |
1054 | c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); | |
1055 | } | |
1056 | ||
1c6fdbd8 KO |
1057 | static const struct bch_sb_field_ops *bch2_sb_field_ops[] = { |
1058 | #define x(f, nr) \ | |
1059 | [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f, | |
1060 | BCH_SB_FIELDS() | |
1061 | #undef x | |
1062 | }; | |
1063 | ||
b9129136 | 1064 | static const struct bch_sb_field_ops bch2_sb_field_null_ops; |
236b68da KO |
1065 | |
1066 | static const struct bch_sb_field_ops *bch2_sb_field_type_ops(unsigned type) | |
1067 | { | |
1068 | return likely(type < ARRAY_SIZE(bch2_sb_field_ops)) | |
1069 | ? bch2_sb_field_ops[type] | |
1070 | : &bch2_sb_field_null_ops; | |
1071 | } | |
1072 | ||
efe68e1d | 1073 | static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, |
7db4cbd0 | 1074 | struct printbuf *err) |
1c6fdbd8 KO |
1075 | { |
1076 | unsigned type = le32_to_cpu(f->type); | |
7db4cbd0 | 1077 | struct printbuf field_err = PRINTBUF; |
236b68da | 1078 | const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); |
efe68e1d | 1079 | int ret; |
1c6fdbd8 | 1080 | |
236b68da | 1081 | ret = ops->validate ? ops->validate(sb, f, &field_err) : 0; |
efe68e1d | 1082 | if (ret) { |
401ec4db | 1083 | prt_printf(err, "Invalid superblock section %s: %s", |
236b68da | 1084 | bch2_sb_fields[type], field_err.buf); |
401ec4db | 1085 | prt_newline(err); |
7db4cbd0 | 1086 | bch2_sb_field_to_text(err, sb, f); |
efe68e1d KO |
1087 | } |
1088 | ||
7db4cbd0 | 1089 | printbuf_exit(&field_err); |
efe68e1d | 1090 | return ret; |
1c6fdbd8 KO |
1091 | } |
1092 | ||
319f9ac3 KO |
1093 | void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, |
1094 | struct bch_sb_field *f) | |
1c6fdbd8 KO |
1095 | { |
1096 | unsigned type = le32_to_cpu(f->type); | |
236b68da | 1097 | const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); |
1c6fdbd8 | 1098 | |
401ec4db KO |
1099 | if (!out->nr_tabstops) |
1100 | printbuf_tabstop_push(out, 32); | |
5521b1df | 1101 | |
236b68da | 1102 | if (type < BCH_SB_FIELD_NR) |
401ec4db | 1103 | prt_printf(out, "%s", bch2_sb_fields[type]); |
319f9ac3 | 1104 | else |
401ec4db | 1105 | prt_printf(out, "(unknown field %u)", type); |
319f9ac3 | 1106 | |
401ec4db KO |
1107 | prt_printf(out, " (size %zu):", vstruct_bytes(f)); |
1108 | prt_newline(out); | |
1c6fdbd8 | 1109 | |
236b68da | 1110 | if (ops->to_text) { |
401ec4db | 1111 | printbuf_indent_add(out, 2); |
236b68da | 1112 | ops->to_text(out, sb, f); |
401ec4db | 1113 | printbuf_indent_sub(out, 2); |
12bf93a4 KO |
1114 | } |
1115 | } | |
1116 | ||
1117 | void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l) | |
1118 | { | |
1119 | unsigned i; | |
1120 | ||
401ec4db KO |
1121 | prt_printf(out, "Type: %u", l->layout_type); |
1122 | prt_newline(out); | |
12bf93a4 | 1123 | |
401ec4db KO |
1124 | prt_str(out, "Superblock max size: "); |
1125 | prt_units_u64(out, 512 << l->sb_max_size_bits); | |
1126 | prt_newline(out); | |
12bf93a4 | 1127 | |
401ec4db KO |
1128 | prt_printf(out, "Nr superblocks: %u", l->nr_superblocks); |
1129 | prt_newline(out); | |
12bf93a4 | 1130 | |
401ec4db | 1131 | prt_str(out, "Offsets: "); |
12bf93a4 KO |
1132 | for (i = 0; i < l->nr_superblocks; i++) { |
1133 | if (i) | |
401ec4db KO |
1134 | prt_str(out, ", "); |
1135 | prt_printf(out, "%llu", le64_to_cpu(l->sb_offset[i])); | |
12bf93a4 | 1136 | } |
401ec4db | 1137 | prt_newline(out); |
12bf93a4 KO |
1138 | } |
1139 | ||
1140 | void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, | |
1141 | bool print_layout, unsigned fields) | |
1142 | { | |
12bf93a4 KO |
1143 | struct bch_sb_field *f; |
1144 | u64 fields_have = 0; | |
1145 | unsigned nr_devices = 0; | |
1146 | ||
401ec4db | 1147 | if (!out->nr_tabstops) |
5a82c7c7 | 1148 | printbuf_tabstop_push(out, 44); |
5521b1df | 1149 | |
1241df58 HS |
1150 | for (int i = 0; i < sb->nr_devices; i++) |
1151 | nr_devices += bch2_dev_exists(sb, i); | |
12bf93a4 | 1152 | |
401ec4db KO |
1153 | prt_printf(out, "External UUID:"); |
1154 | prt_tab(out); | |
12bf93a4 | 1155 | pr_uuid(out, sb->user_uuid.b); |
401ec4db | 1156 | prt_newline(out); |
12bf93a4 | 1157 | |
401ec4db KO |
1158 | prt_printf(out, "Internal UUID:"); |
1159 | prt_tab(out); | |
12bf93a4 | 1160 | pr_uuid(out, sb->uuid.b); |
401ec4db KO |
1161 | prt_newline(out); |
1162 | ||
1163 | prt_str(out, "Device index:"); | |
1164 | prt_tab(out); | |
1165 | prt_printf(out, "%u", sb->dev_idx); | |
1166 | prt_newline(out); | |
1167 | ||
1168 | prt_str(out, "Label:"); | |
1169 | prt_tab(out); | |
1170 | prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label); | |
1171 | prt_newline(out); | |
1172 | ||
1173 | prt_str(out, "Version:"); | |
1174 | prt_tab(out); | |
e3804b55 | 1175 | bch2_version_to_text(out, le16_to_cpu(sb->version)); |
401ec4db KO |
1176 | prt_newline(out); |
1177 | ||
24964e1c KO |
1178 | prt_str(out, "Version upgrade complete:"); |
1179 | prt_tab(out); | |
1180 | bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); | |
1181 | prt_newline(out); | |
1182 | ||
401ec4db KO |
1183 | prt_printf(out, "Oldest version on disk:"); |
1184 | prt_tab(out); | |
e3804b55 | 1185 | bch2_version_to_text(out, le16_to_cpu(sb->version_min)); |
401ec4db KO |
1186 | prt_newline(out); |
1187 | ||
1188 | prt_printf(out, "Created:"); | |
1189 | prt_tab(out); | |
12bf93a4 | 1190 | if (sb->time_base_lo) |
59154f2c | 1191 | bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); |
12bf93a4 | 1192 | else |
401ec4db KO |
1193 | prt_printf(out, "(not set)"); |
1194 | prt_newline(out); | |
1195 | ||
1196 | prt_printf(out, "Sequence number:"); | |
1197 | prt_tab(out); | |
1198 | prt_printf(out, "%llu", le64_to_cpu(sb->seq)); | |
1199 | prt_newline(out); | |
1200 | ||
1201 | prt_printf(out, "Superblock size:"); | |
1202 | prt_tab(out); | |
1203 | prt_printf(out, "%zu", vstruct_bytes(sb)); | |
1204 | prt_newline(out); | |
1205 | ||
1206 | prt_printf(out, "Clean:"); | |
1207 | prt_tab(out); | |
1208 | prt_printf(out, "%llu", BCH_SB_CLEAN(sb)); | |
1209 | prt_newline(out); | |
1210 | ||
1211 | prt_printf(out, "Devices:"); | |
1212 | prt_tab(out); | |
1213 | prt_printf(out, "%u", nr_devices); | |
1214 | prt_newline(out); | |
1215 | ||
1216 | prt_printf(out, "Sections:"); | |
5521b1df KO |
1217 | vstruct_for_each(sb, f) |
1218 | fields_have |= 1 << le32_to_cpu(f->type); | |
401ec4db KO |
1219 | prt_tab(out); |
1220 | prt_bitflags(out, bch2_sb_fields, fields_have); | |
1221 | prt_newline(out); | |
1222 | ||
1223 | prt_printf(out, "Features:"); | |
1224 | prt_tab(out); | |
1225 | prt_bitflags(out, bch2_sb_features, le64_to_cpu(sb->features[0])); | |
1226 | prt_newline(out); | |
1227 | ||
1228 | prt_printf(out, "Compat features:"); | |
1229 | prt_tab(out); | |
1230 | prt_bitflags(out, bch2_sb_compat, le64_to_cpu(sb->compat[0])); | |
1231 | prt_newline(out); | |
1232 | ||
1233 | prt_newline(out); | |
1234 | prt_printf(out, "Options:"); | |
1235 | prt_newline(out); | |
1236 | printbuf_indent_add(out, 2); | |
5521b1df KO |
1237 | { |
1238 | enum bch_opt_id id; | |
12bf93a4 | 1239 | |
5521b1df KO |
1240 | for (id = 0; id < bch2_opts_nr; id++) { |
1241 | const struct bch_option *opt = bch2_opt_table + id; | |
12bf93a4 | 1242 | |
5521b1df KO |
1243 | if (opt->get_sb != BCH2_NO_SB_OPT) { |
1244 | u64 v = bch2_opt_from_sb(sb, id); | |
12bf93a4 | 1245 | |
401ec4db KO |
1246 | prt_printf(out, "%s:", opt->attr.name); |
1247 | prt_tab(out); | |
5521b1df KO |
1248 | bch2_opt_to_text(out, NULL, sb, opt, v, |
1249 | OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST); | |
401ec4db | 1250 | prt_newline(out); |
5521b1df KO |
1251 | } |
1252 | } | |
1253 | } | |
12bf93a4 | 1254 | |
401ec4db | 1255 | printbuf_indent_sub(out, 2); |
12bf93a4 KO |
1256 | |
1257 | if (print_layout) { | |
401ec4db KO |
1258 | prt_newline(out); |
1259 | prt_printf(out, "layout:"); | |
1260 | prt_newline(out); | |
1261 | printbuf_indent_add(out, 2); | |
12bf93a4 | 1262 | bch2_sb_layout_to_text(out, &sb->layout); |
401ec4db | 1263 | printbuf_indent_sub(out, 2); |
12bf93a4 KO |
1264 | } |
1265 | ||
1266 | vstruct_for_each(sb, f) | |
1267 | if (fields & (1 << le32_to_cpu(f->type))) { | |
401ec4db | 1268 | prt_newline(out); |
12bf93a4 KO |
1269 | bch2_sb_field_to_text(out, sb, f); |
1270 | } | |
1c6fdbd8 | 1271 | } |