]> git.ipfire.org Git - thirdparty/linux.git/blob - fs/btrfs/super.c
btrfs: handle the ro->rw transition for mounting different subvolumes
[thirdparty/linux.git] / fs / btrfs / super.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2007 Oracle. All rights reserved.
4 */
5
6 #include <linux/blkdev.h>
7 #include <linux/module.h>
8 #include <linux/fs.h>
9 #include <linux/pagemap.h>
10 #include <linux/highmem.h>
11 #include <linux/time.h>
12 #include <linux/init.h>
13 #include <linux/seq_file.h>
14 #include <linux/string.h>
15 #include <linux/backing-dev.h>
16 #include <linux/mount.h>
17 #include <linux/writeback.h>
18 #include <linux/statfs.h>
19 #include <linux/compat.h>
20 #include <linux/parser.h>
21 #include <linux/ctype.h>
22 #include <linux/namei.h>
23 #include <linux/miscdevice.h>
24 #include <linux/magic.h>
25 #include <linux/slab.h>
26 #include <linux/ratelimit.h>
27 #include <linux/crc32c.h>
28 #include <linux/btrfs.h>
29 #include <linux/security.h>
30 #include <linux/fs_parser.h>
31 #include "messages.h"
32 #include "delayed-inode.h"
33 #include "ctree.h"
34 #include "disk-io.h"
35 #include "transaction.h"
36 #include "btrfs_inode.h"
37 #include "print-tree.h"
38 #include "props.h"
39 #include "xattr.h"
40 #include "bio.h"
41 #include "export.h"
42 #include "compression.h"
43 #include "rcu-string.h"
44 #include "dev-replace.h"
45 #include "free-space-cache.h"
46 #include "backref.h"
47 #include "space-info.h"
48 #include "sysfs.h"
49 #include "zoned.h"
50 #include "tests/btrfs-tests.h"
51 #include "block-group.h"
52 #include "discard.h"
53 #include "qgroup.h"
54 #include "raid56.h"
55 #include "fs.h"
56 #include "accessors.h"
57 #include "defrag.h"
58 #include "dir-item.h"
59 #include "ioctl.h"
60 #include "scrub.h"
61 #include "verity.h"
62 #include "super.h"
63 #include "extent-tree.h"
64 #define CREATE_TRACE_POINTS
65 #include <trace/events/btrfs.h>
66
67 static const struct super_operations btrfs_super_ops;
68
69 /*
70 * Types for mounting the default subvolume and a subvolume explicitly
71 * requested by subvol=/path. That way the callchain is straightforward and we
72 * don't have to play tricks with the mount options and recursive calls to
73 * btrfs_mount.
74 *
75 * The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
76 */
77 static struct file_system_type btrfs_fs_type;
78 static struct file_system_type btrfs_root_fs_type;
79
80 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
81
82 static void btrfs_put_super(struct super_block *sb)
83 {
84 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
85
86 btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid);
87 close_ctree(fs_info);
88 }
89
90 /* Store the mount options related information. */
91 struct btrfs_fs_context {
92 char *subvol_name;
93 u64 subvol_objectid;
94 u64 max_inline;
95 u32 commit_interval;
96 u32 metadata_ratio;
97 u32 thread_pool_size;
98 unsigned long mount_opt;
99 unsigned long compress_type:4;
100 unsigned int compress_level;
101 refcount_t refs;
102 };
103
104 enum {
105 Opt_acl, Opt_noacl,
106 Opt_clear_cache,
107 Opt_commit_interval,
108 Opt_compress,
109 Opt_compress_force,
110 Opt_compress_force_type,
111 Opt_compress_type,
112 Opt_degraded,
113 Opt_device,
114 Opt_fatal_errors,
115 Opt_flushoncommit, Opt_noflushoncommit,
116 Opt_max_inline,
117 Opt_barrier, Opt_nobarrier,
118 Opt_datacow, Opt_nodatacow,
119 Opt_datasum, Opt_nodatasum,
120 Opt_defrag, Opt_nodefrag,
121 Opt_discard, Opt_nodiscard,
122 Opt_discard_mode,
123 Opt_norecovery,
124 Opt_ratio,
125 Opt_rescan_uuid_tree,
126 Opt_skip_balance,
127 Opt_space_cache, Opt_no_space_cache,
128 Opt_space_cache_version,
129 Opt_ssd, Opt_nossd,
130 Opt_ssd_spread, Opt_nossd_spread,
131 Opt_subvol,
132 Opt_subvol_empty,
133 Opt_subvolid,
134 Opt_thread_pool,
135 Opt_treelog, Opt_notreelog,
136 Opt_user_subvol_rm_allowed,
137
138 /* Rescue options */
139 Opt_rescue,
140 Opt_usebackuproot,
141 Opt_nologreplay,
142 Opt_ignorebadroots,
143 Opt_ignoredatacsums,
144 Opt_rescue_all,
145
146 /* Deprecated options */
147 Opt_recovery,
148 Opt_inode_cache, Opt_noinode_cache,
149
150 /* Debugging options */
151 Opt_enospc_debug, Opt_noenospc_debug,
152 #ifdef CONFIG_BTRFS_DEBUG
153 Opt_fragment, Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
154 #endif
155 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
156 Opt_ref_verify,
157 #endif
158 Opt_err,
159 };
160
161 static const match_table_t tokens = {
162 {Opt_acl, "acl"},
163 {Opt_noacl, "noacl"},
164 {Opt_clear_cache, "clear_cache"},
165 {Opt_commit_interval, "commit=%u"},
166 {Opt_compress, "compress"},
167 {Opt_compress_type, "compress=%s"},
168 {Opt_compress_force, "compress-force"},
169 {Opt_compress_force_type, "compress-force=%s"},
170 {Opt_degraded, "degraded"},
171 {Opt_device, "device=%s"},
172 {Opt_fatal_errors, "fatal_errors=%s"},
173 {Opt_flushoncommit, "flushoncommit"},
174 {Opt_noflushoncommit, "noflushoncommit"},
175 {Opt_inode_cache, "inode_cache"},
176 {Opt_noinode_cache, "noinode_cache"},
177 {Opt_max_inline, "max_inline=%s"},
178 {Opt_barrier, "barrier"},
179 {Opt_nobarrier, "nobarrier"},
180 {Opt_datacow, "datacow"},
181 {Opt_nodatacow, "nodatacow"},
182 {Opt_datasum, "datasum"},
183 {Opt_nodatasum, "nodatasum"},
184 {Opt_defrag, "autodefrag"},
185 {Opt_nodefrag, "noautodefrag"},
186 {Opt_discard, "discard"},
187 {Opt_discard_mode, "discard=%s"},
188 {Opt_nodiscard, "nodiscard"},
189 {Opt_norecovery, "norecovery"},
190 {Opt_ratio, "metadata_ratio=%u"},
191 {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
192 {Opt_skip_balance, "skip_balance"},
193 {Opt_space_cache, "space_cache"},
194 {Opt_no_space_cache, "nospace_cache"},
195 {Opt_space_cache_version, "space_cache=%s"},
196 {Opt_ssd, "ssd"},
197 {Opt_nossd, "nossd"},
198 {Opt_ssd_spread, "ssd_spread"},
199 {Opt_nossd_spread, "nossd_spread"},
200 {Opt_subvol, "subvol=%s"},
201 {Opt_subvol_empty, "subvol="},
202 {Opt_subvolid, "subvolid=%s"},
203 {Opt_thread_pool, "thread_pool=%u"},
204 {Opt_treelog, "treelog"},
205 {Opt_notreelog, "notreelog"},
206 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
207
208 /* Rescue options */
209 {Opt_rescue, "rescue=%s"},
210 /* Deprecated, with alias rescue=nologreplay */
211 {Opt_nologreplay, "nologreplay"},
212 /* Deprecated, with alias rescue=usebackuproot */
213 {Opt_usebackuproot, "usebackuproot"},
214
215 /* Deprecated options */
216 {Opt_recovery, "recovery"},
217
218 /* Debugging options */
219 {Opt_enospc_debug, "enospc_debug"},
220 {Opt_noenospc_debug, "noenospc_debug"},
221 #ifdef CONFIG_BTRFS_DEBUG
222 {Opt_fragment_data, "fragment=data"},
223 {Opt_fragment_metadata, "fragment=metadata"},
224 {Opt_fragment_all, "fragment=all"},
225 #endif
226 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
227 {Opt_ref_verify, "ref_verify"},
228 #endif
229 {Opt_err, NULL},
230 };
231
232 static const match_table_t rescue_tokens = {
233 {Opt_usebackuproot, "usebackuproot"},
234 {Opt_nologreplay, "nologreplay"},
235 {Opt_ignorebadroots, "ignorebadroots"},
236 {Opt_ignorebadroots, "ibadroots"},
237 {Opt_ignoredatacsums, "ignoredatacsums"},
238 {Opt_ignoredatacsums, "idatacsums"},
239 {Opt_rescue_all, "all"},
240 {Opt_err, NULL},
241 };
242
243 enum {
244 Opt_fatal_errors_panic,
245 Opt_fatal_errors_bug,
246 };
247
248 static const struct constant_table btrfs_parameter_fatal_errors[] = {
249 { "panic", Opt_fatal_errors_panic },
250 { "bug", Opt_fatal_errors_bug },
251 {}
252 };
253
254 enum {
255 Opt_discard_sync,
256 Opt_discard_async,
257 };
258
259 static const struct constant_table btrfs_parameter_discard[] = {
260 { "sync", Opt_discard_sync },
261 { "async", Opt_discard_async },
262 {}
263 };
264
265 enum {
266 Opt_space_cache_v1,
267 Opt_space_cache_v2,
268 };
269
270 static const struct constant_table btrfs_parameter_space_cache[] = {
271 { "v1", Opt_space_cache_v1 },
272 { "v2", Opt_space_cache_v2 },
273 {}
274 };
275
276 enum {
277 Opt_rescue_usebackuproot,
278 Opt_rescue_nologreplay,
279 Opt_rescue_ignorebadroots,
280 Opt_rescue_ignoredatacsums,
281 Opt_rescue_parameter_all,
282 };
283
284 static const struct constant_table btrfs_parameter_rescue[] = {
285 { "usebackuproot", Opt_rescue_usebackuproot },
286 { "nologreplay", Opt_rescue_nologreplay },
287 { "ignorebadroots", Opt_rescue_ignorebadroots },
288 { "ibadroots", Opt_rescue_ignorebadroots },
289 { "ignoredatacsums", Opt_rescue_ignoredatacsums },
290 { "idatacsums", Opt_rescue_ignoredatacsums },
291 { "all", Opt_rescue_parameter_all },
292 {}
293 };
294
295 #ifdef CONFIG_BTRFS_DEBUG
296 enum {
297 Opt_fragment_parameter_data,
298 Opt_fragment_parameter_metadata,
299 Opt_fragment_parameter_all,
300 };
301
302 static const struct constant_table btrfs_parameter_fragment[] = {
303 { "data", Opt_fragment_parameter_data },
304 { "metadata", Opt_fragment_parameter_metadata },
305 { "all", Opt_fragment_parameter_all },
306 {}
307 };
308 #endif
309
310 static const struct fs_parameter_spec btrfs_fs_parameters[] __maybe_unused = {
311 fsparam_flag_no("acl", Opt_acl),
312 fsparam_flag_no("autodefrag", Opt_defrag),
313 fsparam_flag_no("barrier", Opt_barrier),
314 fsparam_flag("clear_cache", Opt_clear_cache),
315 fsparam_u32("commit", Opt_commit_interval),
316 fsparam_flag("compress", Opt_compress),
317 fsparam_string("compress", Opt_compress_type),
318 fsparam_flag("compress-force", Opt_compress_force),
319 fsparam_string("compress-force", Opt_compress_force_type),
320 fsparam_flag_no("datacow", Opt_datacow),
321 fsparam_flag_no("datasum", Opt_datasum),
322 fsparam_flag("degraded", Opt_degraded),
323 fsparam_string("device", Opt_device),
324 fsparam_flag_no("discard", Opt_discard),
325 fsparam_enum("discard", Opt_discard_mode, btrfs_parameter_discard),
326 fsparam_enum("fatal_errors", Opt_fatal_errors, btrfs_parameter_fatal_errors),
327 fsparam_flag_no("flushoncommit", Opt_flushoncommit),
328 fsparam_flag_no("inode_cache", Opt_inode_cache),
329 fsparam_string("max_inline", Opt_max_inline),
330 fsparam_u32("metadata_ratio", Opt_ratio),
331 fsparam_flag("rescan_uuid_tree", Opt_rescan_uuid_tree),
332 fsparam_flag("skip_balance", Opt_skip_balance),
333 fsparam_flag_no("space_cache", Opt_space_cache),
334 fsparam_enum("space_cache", Opt_space_cache_version, btrfs_parameter_space_cache),
335 fsparam_flag_no("ssd", Opt_ssd),
336 fsparam_flag_no("ssd_spread", Opt_ssd_spread),
337 fsparam_string("subvol", Opt_subvol),
338 fsparam_flag("subvol=", Opt_subvol_empty),
339 fsparam_u64("subvolid", Opt_subvolid),
340 fsparam_u32("thread_pool", Opt_thread_pool),
341 fsparam_flag_no("treelog", Opt_treelog),
342 fsparam_flag("user_subvol_rm_allowed", Opt_user_subvol_rm_allowed),
343
344 /* Rescue options. */
345 fsparam_enum("rescue", Opt_rescue, btrfs_parameter_rescue),
346 /* Deprecated, with alias rescue=nologreplay */
347 __fsparam(NULL, "nologreplay", Opt_nologreplay, fs_param_deprecated, NULL),
348 /* Deprecated, with alias rescue=usebackuproot */
349 __fsparam(NULL, "usebackuproot", Opt_usebackuproot, fs_param_deprecated, NULL),
350
351 /* Deprecated options. */
352 __fsparam(NULL, "recovery", Opt_recovery,
353 fs_param_neg_with_no | fs_param_deprecated, NULL),
354
355 /* Debugging options. */
356 fsparam_flag_no("enospc_debug", Opt_enospc_debug),
357 #ifdef CONFIG_BTRFS_DEBUG
358 fsparam_enum("fragment", Opt_fragment, btrfs_parameter_fragment),
359 #endif
360 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
361 fsparam_flag("ref_verify", Opt_ref_verify),
362 #endif
363 {}
364 };
365
366 static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
367 {
368 struct btrfs_fs_context *ctx = fc->fs_private;
369 struct fs_parse_result result;
370 int opt;
371
372 opt = fs_parse(fc, btrfs_fs_parameters, param, &result);
373 if (opt < 0)
374 return opt;
375
376 switch (opt) {
377 case Opt_degraded:
378 btrfs_set_opt(ctx->mount_opt, DEGRADED);
379 break;
380 case Opt_subvol_empty:
381 /*
382 * This exists because we used to allow it on accident, so we're
383 * keeping it to maintain ABI. See 37becec95ac3 ("Btrfs: allow
384 * empty subvol= again").
385 */
386 break;
387 case Opt_subvol:
388 kfree(ctx->subvol_name);
389 ctx->subvol_name = kstrdup(param->string, GFP_KERNEL);
390 if (!ctx->subvol_name)
391 return -ENOMEM;
392 break;
393 case Opt_subvolid:
394 ctx->subvol_objectid = result.uint_64;
395
396 /* subvolid=0 means give me the original fs_tree. */
397 if (!ctx->subvol_objectid)
398 ctx->subvol_objectid = BTRFS_FS_TREE_OBJECTID;
399 break;
400 case Opt_device: {
401 struct btrfs_device *device;
402 blk_mode_t mode = sb_open_mode(fc->sb_flags);
403
404 mutex_lock(&uuid_mutex);
405 device = btrfs_scan_one_device(param->string, mode, false);
406 mutex_unlock(&uuid_mutex);
407 if (IS_ERR(device))
408 return PTR_ERR(device);
409 break;
410 }
411 case Opt_datasum:
412 if (result.negated) {
413 btrfs_set_opt(ctx->mount_opt, NODATASUM);
414 } else {
415 btrfs_clear_opt(ctx->mount_opt, NODATACOW);
416 btrfs_clear_opt(ctx->mount_opt, NODATASUM);
417 }
418 break;
419 case Opt_datacow:
420 if (result.negated) {
421 btrfs_clear_opt(ctx->mount_opt, COMPRESS);
422 btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
423 btrfs_set_opt(ctx->mount_opt, NODATACOW);
424 btrfs_set_opt(ctx->mount_opt, NODATASUM);
425 } else {
426 btrfs_clear_opt(ctx->mount_opt, NODATACOW);
427 }
428 break;
429 case Opt_compress_force:
430 case Opt_compress_force_type:
431 btrfs_set_opt(ctx->mount_opt, FORCE_COMPRESS);
432 fallthrough;
433 case Opt_compress:
434 case Opt_compress_type:
435 if (opt == Opt_compress || opt == Opt_compress_force) {
436 ctx->compress_type = BTRFS_COMPRESS_ZLIB;
437 ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
438 btrfs_set_opt(ctx->mount_opt, COMPRESS);
439 btrfs_clear_opt(ctx->mount_opt, NODATACOW);
440 btrfs_clear_opt(ctx->mount_opt, NODATASUM);
441 } else if (strncmp(param->string, "zlib", 4) == 0) {
442 ctx->compress_type = BTRFS_COMPRESS_ZLIB;
443 ctx->compress_level =
444 btrfs_compress_str2level(BTRFS_COMPRESS_ZLIB,
445 param->string + 4);
446 btrfs_set_opt(ctx->mount_opt, COMPRESS);
447 btrfs_clear_opt(ctx->mount_opt, NODATACOW);
448 btrfs_clear_opt(ctx->mount_opt, NODATASUM);
449 } else if (strncmp(param->string, "lzo", 3) == 0) {
450 ctx->compress_type = BTRFS_COMPRESS_LZO;
451 ctx->compress_level = 0;
452 btrfs_set_opt(ctx->mount_opt, COMPRESS);
453 btrfs_clear_opt(ctx->mount_opt, NODATACOW);
454 btrfs_clear_opt(ctx->mount_opt, NODATASUM);
455 } else if (strncmp(param->string, "zstd", 4) == 0) {
456 ctx->compress_type = BTRFS_COMPRESS_ZSTD;
457 ctx->compress_level =
458 btrfs_compress_str2level(BTRFS_COMPRESS_ZSTD,
459 param->string + 4);
460 btrfs_set_opt(ctx->mount_opt, COMPRESS);
461 btrfs_clear_opt(ctx->mount_opt, NODATACOW);
462 btrfs_clear_opt(ctx->mount_opt, NODATASUM);
463 } else if (strncmp(param->string, "no", 2) == 0) {
464 ctx->compress_level = 0;
465 ctx->compress_type = 0;
466 btrfs_clear_opt(ctx->mount_opt, COMPRESS);
467 btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
468 } else {
469 btrfs_err(NULL, "unrecognized compression value %s",
470 param->string);
471 return -EINVAL;
472 }
473 break;
474 case Opt_ssd:
475 if (result.negated) {
476 btrfs_set_opt(ctx->mount_opt, NOSSD);
477 btrfs_clear_opt(ctx->mount_opt, SSD);
478 btrfs_clear_opt(ctx->mount_opt, SSD_SPREAD);
479 } else {
480 btrfs_set_opt(ctx->mount_opt, SSD);
481 btrfs_clear_opt(ctx->mount_opt, NOSSD);
482 }
483 break;
484 case Opt_ssd_spread:
485 if (result.negated) {
486 btrfs_clear_opt(ctx->mount_opt, SSD_SPREAD);
487 } else {
488 btrfs_set_opt(ctx->mount_opt, SSD);
489 btrfs_set_opt(ctx->mount_opt, SSD_SPREAD);
490 btrfs_clear_opt(ctx->mount_opt, NOSSD);
491 }
492 break;
493 case Opt_barrier:
494 if (result.negated)
495 btrfs_set_opt(ctx->mount_opt, NOBARRIER);
496 else
497 btrfs_clear_opt(ctx->mount_opt, NOBARRIER);
498 break;
499 case Opt_thread_pool:
500 if (result.uint_32 == 0) {
501 btrfs_err(NULL, "invalid value 0 for thread_pool");
502 return -EINVAL;
503 }
504 ctx->thread_pool_size = result.uint_32;
505 break;
506 case Opt_max_inline:
507 ctx->max_inline = memparse(param->string, NULL);
508 break;
509 case Opt_acl:
510 if (result.negated) {
511 fc->sb_flags &= ~SB_POSIXACL;
512 } else {
513 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
514 fc->sb_flags |= SB_POSIXACL;
515 #else
516 btrfs_err(NULL, "support for ACL not compiled in");
517 return -EINVAL;
518 #endif
519 }
520 /*
521 * VFS limits the ability to toggle ACL on and off via remount,
522 * despite every file system allowing this. This seems to be
523 * an oversight since we all do, but it'll fail if we're
524 * remounting. So don't set the mask here, we'll check it in
525 * btrfs_reconfigure and do the toggling ourselves.
526 */
527 if (fc->purpose != FS_CONTEXT_FOR_RECONFIGURE)
528 fc->sb_flags_mask |= SB_POSIXACL;
529 break;
530 case Opt_treelog:
531 if (result.negated)
532 btrfs_set_opt(ctx->mount_opt, NOTREELOG);
533 else
534 btrfs_clear_opt(ctx->mount_opt, NOTREELOG);
535 break;
536 case Opt_recovery:
537 /*
538 * -o recovery used to be an alias for usebackuproot, and then
539 * norecovery was an alias for nologreplay, hence the different
540 * behaviors for negated and not.
541 */
542 if (result.negated) {
543 btrfs_warn(NULL,
544 "'norecovery' is deprecated, use 'rescue=nologreplay' instead");
545 btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
546 } else {
547 btrfs_warn(NULL,
548 "'recovery' is deprecated, use 'rescue=usebackuproot' instead");
549 btrfs_set_opt(ctx->mount_opt, USEBACKUPROOT);
550 }
551 break;
552 case Opt_nologreplay:
553 btrfs_warn(NULL,
554 "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
555 btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
556 break;
557 case Opt_flushoncommit:
558 if (result.negated)
559 btrfs_clear_opt(ctx->mount_opt, FLUSHONCOMMIT);
560 else
561 btrfs_set_opt(ctx->mount_opt, FLUSHONCOMMIT);
562 break;
563 case Opt_ratio:
564 ctx->metadata_ratio = result.uint_32;
565 break;
566 case Opt_discard:
567 if (result.negated) {
568 btrfs_clear_opt(ctx->mount_opt, DISCARD_SYNC);
569 btrfs_clear_opt(ctx->mount_opt, DISCARD_ASYNC);
570 btrfs_set_opt(ctx->mount_opt, NODISCARD);
571 } else {
572 btrfs_set_opt(ctx->mount_opt, DISCARD_SYNC);
573 btrfs_clear_opt(ctx->mount_opt, DISCARD_ASYNC);
574 }
575 break;
576 case Opt_discard_mode:
577 switch (result.uint_32) {
578 case Opt_discard_sync:
579 btrfs_clear_opt(ctx->mount_opt, DISCARD_ASYNC);
580 btrfs_set_opt(ctx->mount_opt, DISCARD_SYNC);
581 break;
582 case Opt_discard_async:
583 btrfs_clear_opt(ctx->mount_opt, DISCARD_SYNC);
584 btrfs_set_opt(ctx->mount_opt, DISCARD_ASYNC);
585 break;
586 default:
587 btrfs_err(NULL, "unrecognized discard mode value %s",
588 param->key);
589 return -EINVAL;
590 }
591 btrfs_clear_opt(ctx->mount_opt, NODISCARD);
592 break;
593 case Opt_space_cache:
594 if (result.negated) {
595 btrfs_set_opt(ctx->mount_opt, NOSPACECACHE);
596 btrfs_clear_opt(ctx->mount_opt, SPACE_CACHE);
597 btrfs_clear_opt(ctx->mount_opt, FREE_SPACE_TREE);
598 } else {
599 btrfs_clear_opt(ctx->mount_opt, FREE_SPACE_TREE);
600 btrfs_set_opt(ctx->mount_opt, SPACE_CACHE);
601 }
602 break;
603 case Opt_space_cache_version:
604 switch (result.uint_32) {
605 case Opt_space_cache_v1:
606 btrfs_set_opt(ctx->mount_opt, SPACE_CACHE);
607 btrfs_clear_opt(ctx->mount_opt, FREE_SPACE_TREE);
608 break;
609 case Opt_space_cache_v2:
610 btrfs_clear_opt(ctx->mount_opt, SPACE_CACHE);
611 btrfs_set_opt(ctx->mount_opt, FREE_SPACE_TREE);
612 break;
613 default:
614 btrfs_err(NULL, "unrecognized space_cache value %s",
615 param->key);
616 return -EINVAL;
617 }
618 break;
619 case Opt_rescan_uuid_tree:
620 btrfs_set_opt(ctx->mount_opt, RESCAN_UUID_TREE);
621 break;
622 case Opt_inode_cache:
623 btrfs_warn(NULL,
624 "the 'inode_cache' option is deprecated and has no effect since 5.11");
625 break;
626 case Opt_clear_cache:
627 btrfs_set_opt(ctx->mount_opt, CLEAR_CACHE);
628 break;
629 case Opt_user_subvol_rm_allowed:
630 btrfs_set_opt(ctx->mount_opt, USER_SUBVOL_RM_ALLOWED);
631 break;
632 case Opt_enospc_debug:
633 if (result.negated)
634 btrfs_clear_opt(ctx->mount_opt, ENOSPC_DEBUG);
635 else
636 btrfs_set_opt(ctx->mount_opt, ENOSPC_DEBUG);
637 break;
638 case Opt_defrag:
639 if (result.negated)
640 btrfs_clear_opt(ctx->mount_opt, AUTO_DEFRAG);
641 else
642 btrfs_set_opt(ctx->mount_opt, AUTO_DEFRAG);
643 break;
644 case Opt_usebackuproot:
645 btrfs_warn(NULL,
646 "'usebackuproot' is deprecated, use 'rescue=usebackuproot' instead");
647 btrfs_set_opt(ctx->mount_opt, USEBACKUPROOT);
648 break;
649 case Opt_skip_balance:
650 btrfs_set_opt(ctx->mount_opt, SKIP_BALANCE);
651 break;
652 case Opt_fatal_errors:
653 switch (result.uint_32) {
654 case Opt_fatal_errors_panic:
655 btrfs_set_opt(ctx->mount_opt, PANIC_ON_FATAL_ERROR);
656 break;
657 case Opt_fatal_errors_bug:
658 btrfs_clear_opt(ctx->mount_opt, PANIC_ON_FATAL_ERROR);
659 break;
660 default:
661 btrfs_err(NULL, "unrecognized fatal_errors value %s",
662 param->key);
663 return -EINVAL;
664 }
665 break;
666 case Opt_commit_interval:
667 ctx->commit_interval = result.uint_32;
668 if (ctx->commit_interval == 0)
669 ctx->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
670 break;
671 case Opt_rescue:
672 switch (result.uint_32) {
673 case Opt_rescue_usebackuproot:
674 btrfs_set_opt(ctx->mount_opt, USEBACKUPROOT);
675 break;
676 case Opt_rescue_nologreplay:
677 btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
678 break;
679 case Opt_rescue_ignorebadroots:
680 btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS);
681 break;
682 case Opt_rescue_ignoredatacsums:
683 btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS);
684 break;
685 case Opt_rescue_parameter_all:
686 btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS);
687 btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS);
688 btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
689 break;
690 default:
691 btrfs_info(NULL, "unrecognized rescue option '%s'",
692 param->key);
693 return -EINVAL;
694 }
695 break;
696 #ifdef CONFIG_BTRFS_DEBUG
697 case Opt_fragment:
698 switch (result.uint_32) {
699 case Opt_fragment_parameter_all:
700 btrfs_set_opt(ctx->mount_opt, FRAGMENT_DATA);
701 btrfs_set_opt(ctx->mount_opt, FRAGMENT_METADATA);
702 break;
703 case Opt_fragment_parameter_metadata:
704 btrfs_set_opt(ctx->mount_opt, FRAGMENT_METADATA);
705 break;
706 case Opt_fragment_parameter_data:
707 btrfs_set_opt(ctx->mount_opt, FRAGMENT_DATA);
708 break;
709 default:
710 btrfs_info(NULL, "unrecognized fragment option '%s'",
711 param->key);
712 return -EINVAL;
713 }
714 break;
715 #endif
716 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
717 case Opt_ref_verify:
718 btrfs_set_opt(ctx->mount_opt, REF_VERIFY);
719 break;
720 #endif
721 default:
722 btrfs_err(NULL, "unrecognized mount option '%s'", param->key);
723 return -EINVAL;
724 }
725
726 return 0;
727 }
728
729 static bool check_ro_option(struct btrfs_fs_info *fs_info,
730 unsigned long mount_opt, unsigned long opt,
731 const char *opt_name)
732 {
733 if (mount_opt & opt) {
734 btrfs_err(fs_info, "%s must be used with ro mount option",
735 opt_name);
736 return true;
737 }
738 return false;
739 }
740
741 static bool check_options(struct btrfs_fs_info *info, unsigned long *mount_opt,
742 unsigned long flags)
743 {
744 bool ret = true;
745
746 if (!(flags & SB_RDONLY) &&
747 (check_ro_option(info, *mount_opt, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
748 check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") ||
749 check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums")))
750 ret = false;
751
752 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
753 !btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE) &&
754 !btrfs_raw_test_opt(*mount_opt, CLEAR_CACHE)) {
755 btrfs_err(info, "cannot disable free-space-tree");
756 ret = false;
757 }
758 if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) &&
759 !btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) {
760 btrfs_err(info, "cannot disable free-space-tree with block-group-tree feature");
761 ret = false;
762 }
763
764 if (btrfs_check_mountopts_zoned(info, mount_opt))
765 ret = false;
766
767 if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) {
768 if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE))
769 btrfs_info(info, "disk space caching is enabled");
770 if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE))
771 btrfs_info(info, "using free-space-tree");
772 }
773
774 return ret;
775 }
776
777 /*
778 * This is subtle, we only call this during open_ctree(). We need to pre-load
779 * the mount options with the on-disk settings. Before the new mount API took
780 * effect we would do this on mount and remount. With the new mount API we'll
781 * only do this on the initial mount.
782 *
783 * This isn't a change in behavior, because we're using the current state of the
784 * file system to set the current mount options. If you mounted with special
785 * options to disable these features and then remounted we wouldn't revert the
786 * settings, because mounting without these features cleared the on-disk
787 * settings, so this being called on re-mount is not needed.
788 */
789 void btrfs_set_free_space_cache_settings(struct btrfs_fs_info *fs_info)
790 {
791 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
792 btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
793 else if (btrfs_free_space_cache_v1_active(fs_info)) {
794 if (btrfs_is_zoned(fs_info)) {
795 btrfs_info(fs_info,
796 "zoned: clearing existing space cache");
797 btrfs_set_super_cache_generation(fs_info->super_copy, 0);
798 } else {
799 btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE);
800 }
801 }
802
803 if (fs_info->sectorsize < PAGE_SIZE) {
804 btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
805 if (!btrfs_test_opt(fs_info, FREE_SPACE_TREE)) {
806 btrfs_info(fs_info,
807 "forcing free space tree for sector size %u with page size %lu",
808 fs_info->sectorsize, PAGE_SIZE);
809 btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
810 }
811 }
812 }
813
814 static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
815 {
816 char *opts;
817 char *orig;
818 char *p;
819 substring_t args[MAX_OPT_ARGS];
820 int ret = 0;
821
822 opts = kstrdup(options, GFP_KERNEL);
823 if (!opts)
824 return -ENOMEM;
825 orig = opts;
826
827 while ((p = strsep(&opts, ":")) != NULL) {
828 int token;
829
830 if (!*p)
831 continue;
832 token = match_token(p, rescue_tokens, args);
833 switch (token){
834 case Opt_usebackuproot:
835 btrfs_info(info,
836 "trying to use backup root at mount time");
837 btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
838 break;
839 case Opt_nologreplay:
840 btrfs_set_and_info(info, NOLOGREPLAY,
841 "disabling log replay at mount time");
842 break;
843 case Opt_ignorebadroots:
844 btrfs_set_and_info(info, IGNOREBADROOTS,
845 "ignoring bad roots");
846 break;
847 case Opt_ignoredatacsums:
848 btrfs_set_and_info(info, IGNOREDATACSUMS,
849 "ignoring data csums");
850 break;
851 case Opt_rescue_all:
852 btrfs_info(info, "enabling all of the rescue options");
853 btrfs_set_and_info(info, IGNOREDATACSUMS,
854 "ignoring data csums");
855 btrfs_set_and_info(info, IGNOREBADROOTS,
856 "ignoring bad roots");
857 btrfs_set_and_info(info, NOLOGREPLAY,
858 "disabling log replay at mount time");
859 break;
860 case Opt_err:
861 btrfs_info(info, "unrecognized rescue option '%s'", p);
862 ret = -EINVAL;
863 goto out;
864 default:
865 break;
866 }
867
868 }
869 out:
870 kfree(orig);
871 return ret;
872 }
873
874 /*
875 * Regular mount options parser. Everything that is needed only when
876 * reading in a new superblock is parsed here.
877 * XXX JDM: This needs to be cleaned up for remount.
878 */
879 int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
880 unsigned long new_flags)
881 {
882 substring_t args[MAX_OPT_ARGS];
883 char *p, *num;
884 int intarg;
885 int ret = 0;
886 char *compress_type;
887 bool compress_force = false;
888 enum btrfs_compression_type saved_compress_type;
889 int saved_compress_level;
890 bool saved_compress_force;
891 int no_compress = 0;
892
893 /*
894 * Even the options are empty, we still need to do extra check
895 * against new flags
896 */
897 if (!options)
898 goto out;
899
900 while ((p = strsep(&options, ",")) != NULL) {
901 int token;
902 if (!*p)
903 continue;
904
905 token = match_token(p, tokens, args);
906 switch (token) {
907 case Opt_degraded:
908 btrfs_info(info, "allowing degraded mounts");
909 btrfs_set_opt(info->mount_opt, DEGRADED);
910 break;
911 case Opt_subvol:
912 case Opt_subvol_empty:
913 case Opt_subvolid:
914 case Opt_device:
915 /*
916 * These are parsed by btrfs_parse_subvol_options or
917 * btrfs_parse_device_options and can be ignored here.
918 */
919 break;
920 case Opt_nodatasum:
921 btrfs_set_and_info(info, NODATASUM,
922 "setting nodatasum");
923 break;
924 case Opt_datasum:
925 if (btrfs_test_opt(info, NODATASUM)) {
926 if (btrfs_test_opt(info, NODATACOW))
927 btrfs_info(info,
928 "setting datasum, datacow enabled");
929 else
930 btrfs_info(info, "setting datasum");
931 }
932 btrfs_clear_opt(info->mount_opt, NODATACOW);
933 btrfs_clear_opt(info->mount_opt, NODATASUM);
934 break;
935 case Opt_nodatacow:
936 if (!btrfs_test_opt(info, NODATACOW)) {
937 if (!btrfs_test_opt(info, COMPRESS) ||
938 !btrfs_test_opt(info, FORCE_COMPRESS)) {
939 btrfs_info(info,
940 "setting nodatacow, compression disabled");
941 } else {
942 btrfs_info(info, "setting nodatacow");
943 }
944 }
945 btrfs_clear_opt(info->mount_opt, COMPRESS);
946 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
947 btrfs_set_opt(info->mount_opt, NODATACOW);
948 btrfs_set_opt(info->mount_opt, NODATASUM);
949 break;
950 case Opt_datacow:
951 btrfs_clear_and_info(info, NODATACOW,
952 "setting datacow");
953 break;
954 case Opt_compress_force:
955 case Opt_compress_force_type:
956 compress_force = true;
957 fallthrough;
958 case Opt_compress:
959 case Opt_compress_type:
960 saved_compress_type = btrfs_test_opt(info,
961 COMPRESS) ?
962 info->compress_type : BTRFS_COMPRESS_NONE;
963 saved_compress_force =
964 btrfs_test_opt(info, FORCE_COMPRESS);
965 saved_compress_level = info->compress_level;
966 if (token == Opt_compress ||
967 token == Opt_compress_force ||
968 strncmp(args[0].from, "zlib", 4) == 0) {
969 compress_type = "zlib";
970
971 info->compress_type = BTRFS_COMPRESS_ZLIB;
972 info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
973 /*
974 * args[0] contains uninitialized data since
975 * for these tokens we don't expect any
976 * parameter.
977 */
978 if (token != Opt_compress &&
979 token != Opt_compress_force)
980 info->compress_level =
981 btrfs_compress_str2level(
982 BTRFS_COMPRESS_ZLIB,
983 args[0].from + 4);
984 btrfs_set_opt(info->mount_opt, COMPRESS);
985 btrfs_clear_opt(info->mount_opt, NODATACOW);
986 btrfs_clear_opt(info->mount_opt, NODATASUM);
987 no_compress = 0;
988 } else if (strncmp(args[0].from, "lzo", 3) == 0) {
989 compress_type = "lzo";
990 info->compress_type = BTRFS_COMPRESS_LZO;
991 info->compress_level = 0;
992 btrfs_set_opt(info->mount_opt, COMPRESS);
993 btrfs_clear_opt(info->mount_opt, NODATACOW);
994 btrfs_clear_opt(info->mount_opt, NODATASUM);
995 btrfs_set_fs_incompat(info, COMPRESS_LZO);
996 no_compress = 0;
997 } else if (strncmp(args[0].from, "zstd", 4) == 0) {
998 compress_type = "zstd";
999 info->compress_type = BTRFS_COMPRESS_ZSTD;
1000 info->compress_level =
1001 btrfs_compress_str2level(
1002 BTRFS_COMPRESS_ZSTD,
1003 args[0].from + 4);
1004 btrfs_set_opt(info->mount_opt, COMPRESS);
1005 btrfs_clear_opt(info->mount_opt, NODATACOW);
1006 btrfs_clear_opt(info->mount_opt, NODATASUM);
1007 btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
1008 no_compress = 0;
1009 } else if (strncmp(args[0].from, "no", 2) == 0) {
1010 compress_type = "no";
1011 info->compress_level = 0;
1012 info->compress_type = 0;
1013 btrfs_clear_opt(info->mount_opt, COMPRESS);
1014 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
1015 compress_force = false;
1016 no_compress++;
1017 } else {
1018 btrfs_err(info, "unrecognized compression value %s",
1019 args[0].from);
1020 ret = -EINVAL;
1021 goto out;
1022 }
1023
1024 if (compress_force) {
1025 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
1026 } else {
1027 /*
1028 * If we remount from compress-force=xxx to
1029 * compress=xxx, we need clear FORCE_COMPRESS
1030 * flag, otherwise, there is no way for users
1031 * to disable forcible compression separately.
1032 */
1033 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
1034 }
1035 if (no_compress == 1) {
1036 btrfs_info(info, "use no compression");
1037 } else if ((info->compress_type != saved_compress_type) ||
1038 (compress_force != saved_compress_force) ||
1039 (info->compress_level != saved_compress_level)) {
1040 btrfs_info(info, "%s %s compression, level %d",
1041 (compress_force) ? "force" : "use",
1042 compress_type, info->compress_level);
1043 }
1044 compress_force = false;
1045 break;
1046 case Opt_ssd:
1047 btrfs_set_and_info(info, SSD,
1048 "enabling ssd optimizations");
1049 btrfs_clear_opt(info->mount_opt, NOSSD);
1050 break;
1051 case Opt_ssd_spread:
1052 btrfs_set_and_info(info, SSD,
1053 "enabling ssd optimizations");
1054 btrfs_set_and_info(info, SSD_SPREAD,
1055 "using spread ssd allocation scheme");
1056 btrfs_clear_opt(info->mount_opt, NOSSD);
1057 break;
1058 case Opt_nossd:
1059 btrfs_set_opt(info->mount_opt, NOSSD);
1060 btrfs_clear_and_info(info, SSD,
1061 "not using ssd optimizations");
1062 fallthrough;
1063 case Opt_nossd_spread:
1064 btrfs_clear_and_info(info, SSD_SPREAD,
1065 "not using spread ssd allocation scheme");
1066 break;
1067 case Opt_barrier:
1068 btrfs_clear_and_info(info, NOBARRIER,
1069 "turning on barriers");
1070 break;
1071 case Opt_nobarrier:
1072 btrfs_set_and_info(info, NOBARRIER,
1073 "turning off barriers");
1074 break;
1075 case Opt_thread_pool:
1076 ret = match_int(&args[0], &intarg);
1077 if (ret) {
1078 btrfs_err(info, "unrecognized thread_pool value %s",
1079 args[0].from);
1080 goto out;
1081 } else if (intarg == 0) {
1082 btrfs_err(info, "invalid value 0 for thread_pool");
1083 ret = -EINVAL;
1084 goto out;
1085 }
1086 info->thread_pool_size = intarg;
1087 break;
1088 case Opt_max_inline:
1089 num = match_strdup(&args[0]);
1090 if (num) {
1091 info->max_inline = memparse(num, NULL);
1092 kfree(num);
1093
1094 if (info->max_inline) {
1095 info->max_inline = min_t(u64,
1096 info->max_inline,
1097 info->sectorsize);
1098 }
1099 btrfs_info(info, "max_inline at %llu",
1100 info->max_inline);
1101 } else {
1102 ret = -ENOMEM;
1103 goto out;
1104 }
1105 break;
1106 case Opt_acl:
1107 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
1108 info->sb->s_flags |= SB_POSIXACL;
1109 break;
1110 #else
1111 btrfs_err(info, "support for ACL not compiled in!");
1112 ret = -EINVAL;
1113 goto out;
1114 #endif
1115 case Opt_noacl:
1116 info->sb->s_flags &= ~SB_POSIXACL;
1117 break;
1118 case Opt_notreelog:
1119 btrfs_set_and_info(info, NOTREELOG,
1120 "disabling tree log");
1121 break;
1122 case Opt_treelog:
1123 btrfs_clear_and_info(info, NOTREELOG,
1124 "enabling tree log");
1125 break;
1126 case Opt_norecovery:
1127 case Opt_nologreplay:
1128 btrfs_warn(info,
1129 "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
1130 btrfs_set_and_info(info, NOLOGREPLAY,
1131 "disabling log replay at mount time");
1132 break;
1133 case Opt_flushoncommit:
1134 btrfs_set_and_info(info, FLUSHONCOMMIT,
1135 "turning on flush-on-commit");
1136 break;
1137 case Opt_noflushoncommit:
1138 btrfs_clear_and_info(info, FLUSHONCOMMIT,
1139 "turning off flush-on-commit");
1140 break;
1141 case Opt_ratio:
1142 ret = match_int(&args[0], &intarg);
1143 if (ret) {
1144 btrfs_err(info, "unrecognized metadata_ratio value %s",
1145 args[0].from);
1146 goto out;
1147 }
1148 info->metadata_ratio = intarg;
1149 btrfs_info(info, "metadata ratio %u",
1150 info->metadata_ratio);
1151 break;
1152 case Opt_discard:
1153 case Opt_discard_mode:
1154 if (token == Opt_discard ||
1155 strcmp(args[0].from, "sync") == 0) {
1156 btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC);
1157 btrfs_set_and_info(info, DISCARD_SYNC,
1158 "turning on sync discard");
1159 } else if (strcmp(args[0].from, "async") == 0) {
1160 btrfs_clear_opt(info->mount_opt, DISCARD_SYNC);
1161 btrfs_set_and_info(info, DISCARD_ASYNC,
1162 "turning on async discard");
1163 } else {
1164 btrfs_err(info, "unrecognized discard mode value %s",
1165 args[0].from);
1166 ret = -EINVAL;
1167 goto out;
1168 }
1169 btrfs_clear_opt(info->mount_opt, NODISCARD);
1170 break;
1171 case Opt_nodiscard:
1172 btrfs_clear_and_info(info, DISCARD_SYNC,
1173 "turning off discard");
1174 btrfs_clear_and_info(info, DISCARD_ASYNC,
1175 "turning off async discard");
1176 btrfs_set_opt(info->mount_opt, NODISCARD);
1177 break;
1178 case Opt_space_cache:
1179 case Opt_space_cache_version:
1180 /*
1181 * We already set FREE_SPACE_TREE above because we have
1182 * compat_ro(FREE_SPACE_TREE) set, and we aren't going
1183 * to allow v1 to be set for extent tree v2, simply
1184 * ignore this setting if we're extent tree v2.
1185 *
1186 * For subpage blocksize we don't allow space cache v1,
1187 * and we'll turn on v2, so we can skip the settings
1188 * here as well.
1189 */
1190 if (btrfs_fs_incompat(info, EXTENT_TREE_V2) ||
1191 info->sectorsize < PAGE_SIZE)
1192 break;
1193 if (token == Opt_space_cache ||
1194 strcmp(args[0].from, "v1") == 0) {
1195 btrfs_clear_opt(info->mount_opt,
1196 FREE_SPACE_TREE);
1197 btrfs_set_and_info(info, SPACE_CACHE,
1198 "enabling disk space caching");
1199 } else if (strcmp(args[0].from, "v2") == 0) {
1200 btrfs_clear_opt(info->mount_opt,
1201 SPACE_CACHE);
1202 btrfs_set_and_info(info, FREE_SPACE_TREE,
1203 "enabling free space tree");
1204 } else {
1205 btrfs_err(info, "unrecognized space_cache value %s",
1206 args[0].from);
1207 ret = -EINVAL;
1208 goto out;
1209 }
1210 break;
1211 case Opt_rescan_uuid_tree:
1212 btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
1213 break;
1214 case Opt_no_space_cache:
1215 /*
1216 * We cannot operate without the free space tree with
1217 * extent tree v2, ignore this option.
1218 */
1219 if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
1220 break;
1221 if (btrfs_test_opt(info, SPACE_CACHE)) {
1222 btrfs_clear_and_info(info, SPACE_CACHE,
1223 "disabling disk space caching");
1224 }
1225 if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
1226 btrfs_clear_and_info(info, FREE_SPACE_TREE,
1227 "disabling free space tree");
1228 }
1229 break;
1230 case Opt_inode_cache:
1231 case Opt_noinode_cache:
1232 btrfs_warn(info,
1233 "the 'inode_cache' option is deprecated and has no effect since 5.11");
1234 break;
1235 case Opt_clear_cache:
1236 /*
1237 * We cannot clear the free space tree with extent tree
1238 * v2, ignore this option.
1239 */
1240 if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
1241 break;
1242 btrfs_set_and_info(info, CLEAR_CACHE,
1243 "force clearing of disk cache");
1244 break;
1245 case Opt_user_subvol_rm_allowed:
1246 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
1247 break;
1248 case Opt_enospc_debug:
1249 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
1250 break;
1251 case Opt_noenospc_debug:
1252 btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
1253 break;
1254 case Opt_defrag:
1255 btrfs_set_and_info(info, AUTO_DEFRAG,
1256 "enabling auto defrag");
1257 break;
1258 case Opt_nodefrag:
1259 btrfs_clear_and_info(info, AUTO_DEFRAG,
1260 "disabling auto defrag");
1261 break;
1262 case Opt_recovery:
1263 case Opt_usebackuproot:
1264 btrfs_warn(info,
1265 "'%s' is deprecated, use 'rescue=usebackuproot' instead",
1266 token == Opt_recovery ? "recovery" :
1267 "usebackuproot");
1268 btrfs_info(info,
1269 "trying to use backup root at mount time");
1270 btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
1271 break;
1272 case Opt_skip_balance:
1273 btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
1274 break;
1275 case Opt_fatal_errors:
1276 if (strcmp(args[0].from, "panic") == 0) {
1277 btrfs_set_opt(info->mount_opt,
1278 PANIC_ON_FATAL_ERROR);
1279 } else if (strcmp(args[0].from, "bug") == 0) {
1280 btrfs_clear_opt(info->mount_opt,
1281 PANIC_ON_FATAL_ERROR);
1282 } else {
1283 btrfs_err(info, "unrecognized fatal_errors value %s",
1284 args[0].from);
1285 ret = -EINVAL;
1286 goto out;
1287 }
1288 break;
1289 case Opt_commit_interval:
1290 intarg = 0;
1291 ret = match_int(&args[0], &intarg);
1292 if (ret) {
1293 btrfs_err(info, "unrecognized commit_interval value %s",
1294 args[0].from);
1295 ret = -EINVAL;
1296 goto out;
1297 }
1298 if (intarg == 0) {
1299 btrfs_info(info,
1300 "using default commit interval %us",
1301 BTRFS_DEFAULT_COMMIT_INTERVAL);
1302 intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
1303 } else if (intarg > 300) {
1304 btrfs_warn(info, "excessive commit interval %d",
1305 intarg);
1306 }
1307 info->commit_interval = intarg;
1308 break;
1309 case Opt_rescue:
1310 ret = parse_rescue_options(info, args[0].from);
1311 if (ret < 0) {
1312 btrfs_err(info, "unrecognized rescue value %s",
1313 args[0].from);
1314 goto out;
1315 }
1316 break;
1317 #ifdef CONFIG_BTRFS_DEBUG
1318 case Opt_fragment_all:
1319 btrfs_info(info, "fragmenting all space");
1320 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
1321 btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
1322 break;
1323 case Opt_fragment_metadata:
1324 btrfs_info(info, "fragmenting metadata");
1325 btrfs_set_opt(info->mount_opt,
1326 FRAGMENT_METADATA);
1327 break;
1328 case Opt_fragment_data:
1329 btrfs_info(info, "fragmenting data");
1330 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
1331 break;
1332 #endif
1333 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
1334 case Opt_ref_verify:
1335 btrfs_info(info, "doing ref verification");
1336 btrfs_set_opt(info->mount_opt, REF_VERIFY);
1337 break;
1338 #endif
1339 case Opt_err:
1340 btrfs_err(info, "unrecognized mount option '%s'", p);
1341 ret = -EINVAL;
1342 goto out;
1343 default:
1344 break;
1345 }
1346 }
1347 out:
1348 if (!ret && !check_options(info, &info->mount_opt, new_flags))
1349 ret = -EINVAL;
1350 return ret;
1351 }
1352
1353 /*
1354 * Parse mount options that are required early in the mount process.
1355 *
1356 * All other options will be parsed on much later in the mount process and
1357 * only when we need to allocate a new super block.
1358 */
1359 static int btrfs_parse_device_options(const char *options, blk_mode_t flags)
1360 {
1361 substring_t args[MAX_OPT_ARGS];
1362 char *device_name, *opts, *orig, *p;
1363 struct btrfs_device *device = NULL;
1364 int error = 0;
1365
1366 lockdep_assert_held(&uuid_mutex);
1367
1368 if (!options)
1369 return 0;
1370
1371 /*
1372 * strsep changes the string, duplicate it because btrfs_parse_options
1373 * gets called later
1374 */
1375 opts = kstrdup(options, GFP_KERNEL);
1376 if (!opts)
1377 return -ENOMEM;
1378 orig = opts;
1379
1380 while ((p = strsep(&opts, ",")) != NULL) {
1381 int token;
1382
1383 if (!*p)
1384 continue;
1385
1386 token = match_token(p, tokens, args);
1387 if (token == Opt_device) {
1388 device_name = match_strdup(&args[0]);
1389 if (!device_name) {
1390 error = -ENOMEM;
1391 goto out;
1392 }
1393 device = btrfs_scan_one_device(device_name, flags, false);
1394 kfree(device_name);
1395 if (IS_ERR(device)) {
1396 error = PTR_ERR(device);
1397 goto out;
1398 }
1399 }
1400 }
1401
1402 out:
1403 kfree(orig);
1404 return error;
1405 }
1406
1407 /*
1408 * Parse mount options that are related to subvolume id
1409 *
1410 * The value is later passed to mount_subvol()
1411 */
1412 static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
1413 u64 *subvol_objectid)
1414 {
1415 substring_t args[MAX_OPT_ARGS];
1416 char *opts, *orig, *p;
1417 int error = 0;
1418 u64 subvolid;
1419
1420 if (!options)
1421 return 0;
1422
1423 /*
1424 * strsep changes the string, duplicate it because
1425 * btrfs_parse_device_options gets called later
1426 */
1427 opts = kstrdup(options, GFP_KERNEL);
1428 if (!opts)
1429 return -ENOMEM;
1430 orig = opts;
1431
1432 while ((p = strsep(&opts, ",")) != NULL) {
1433 int token;
1434 if (!*p)
1435 continue;
1436
1437 token = match_token(p, tokens, args);
1438 switch (token) {
1439 case Opt_subvol:
1440 kfree(*subvol_name);
1441 *subvol_name = match_strdup(&args[0]);
1442 if (!*subvol_name) {
1443 error = -ENOMEM;
1444 goto out;
1445 }
1446 break;
1447 case Opt_subvolid:
1448 error = match_u64(&args[0], &subvolid);
1449 if (error)
1450 goto out;
1451
1452 /* we want the original fs_tree */
1453 if (subvolid == 0)
1454 subvolid = BTRFS_FS_TREE_OBJECTID;
1455
1456 *subvol_objectid = subvolid;
1457 break;
1458 default:
1459 break;
1460 }
1461 }
1462
1463 out:
1464 kfree(orig);
1465 return error;
1466 }
1467
1468 char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
1469 u64 subvol_objectid)
1470 {
1471 struct btrfs_root *root = fs_info->tree_root;
1472 struct btrfs_root *fs_root = NULL;
1473 struct btrfs_root_ref *root_ref;
1474 struct btrfs_inode_ref *inode_ref;
1475 struct btrfs_key key;
1476 struct btrfs_path *path = NULL;
1477 char *name = NULL, *ptr;
1478 u64 dirid;
1479 int len;
1480 int ret;
1481
1482 path = btrfs_alloc_path();
1483 if (!path) {
1484 ret = -ENOMEM;
1485 goto err;
1486 }
1487
1488 name = kmalloc(PATH_MAX, GFP_KERNEL);
1489 if (!name) {
1490 ret = -ENOMEM;
1491 goto err;
1492 }
1493 ptr = name + PATH_MAX - 1;
1494 ptr[0] = '\0';
1495
1496 /*
1497 * Walk up the subvolume trees in the tree of tree roots by root
1498 * backrefs until we hit the top-level subvolume.
1499 */
1500 while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
1501 key.objectid = subvol_objectid;
1502 key.type = BTRFS_ROOT_BACKREF_KEY;
1503 key.offset = (u64)-1;
1504
1505 ret = btrfs_search_backwards(root, &key, path);
1506 if (ret < 0) {
1507 goto err;
1508 } else if (ret > 0) {
1509 ret = -ENOENT;
1510 goto err;
1511 }
1512
1513 subvol_objectid = key.offset;
1514
1515 root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1516 struct btrfs_root_ref);
1517 len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
1518 ptr -= len + 1;
1519 if (ptr < name) {
1520 ret = -ENAMETOOLONG;
1521 goto err;
1522 }
1523 read_extent_buffer(path->nodes[0], ptr + 1,
1524 (unsigned long)(root_ref + 1), len);
1525 ptr[0] = '/';
1526 dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
1527 btrfs_release_path(path);
1528
1529 fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true);
1530 if (IS_ERR(fs_root)) {
1531 ret = PTR_ERR(fs_root);
1532 fs_root = NULL;
1533 goto err;
1534 }
1535
1536 /*
1537 * Walk up the filesystem tree by inode refs until we hit the
1538 * root directory.
1539 */
1540 while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
1541 key.objectid = dirid;
1542 key.type = BTRFS_INODE_REF_KEY;
1543 key.offset = (u64)-1;
1544
1545 ret = btrfs_search_backwards(fs_root, &key, path);
1546 if (ret < 0) {
1547 goto err;
1548 } else if (ret > 0) {
1549 ret = -ENOENT;
1550 goto err;
1551 }
1552
1553 dirid = key.offset;
1554
1555 inode_ref = btrfs_item_ptr(path->nodes[0],
1556 path->slots[0],
1557 struct btrfs_inode_ref);
1558 len = btrfs_inode_ref_name_len(path->nodes[0],
1559 inode_ref);
1560 ptr -= len + 1;
1561 if (ptr < name) {
1562 ret = -ENAMETOOLONG;
1563 goto err;
1564 }
1565 read_extent_buffer(path->nodes[0], ptr + 1,
1566 (unsigned long)(inode_ref + 1), len);
1567 ptr[0] = '/';
1568 btrfs_release_path(path);
1569 }
1570 btrfs_put_root(fs_root);
1571 fs_root = NULL;
1572 }
1573
1574 btrfs_free_path(path);
1575 if (ptr == name + PATH_MAX - 1) {
1576 name[0] = '/';
1577 name[1] = '\0';
1578 } else {
1579 memmove(name, ptr, name + PATH_MAX - ptr);
1580 }
1581 return name;
1582
1583 err:
1584 btrfs_put_root(fs_root);
1585 btrfs_free_path(path);
1586 kfree(name);
1587 return ERR_PTR(ret);
1588 }
1589
1590 static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
1591 {
1592 struct btrfs_root *root = fs_info->tree_root;
1593 struct btrfs_dir_item *di;
1594 struct btrfs_path *path;
1595 struct btrfs_key location;
1596 struct fscrypt_str name = FSTR_INIT("default", 7);
1597 u64 dir_id;
1598
1599 path = btrfs_alloc_path();
1600 if (!path)
1601 return -ENOMEM;
1602
1603 /*
1604 * Find the "default" dir item which points to the root item that we
1605 * will mount by default if we haven't been given a specific subvolume
1606 * to mount.
1607 */
1608 dir_id = btrfs_super_root_dir(fs_info->super_copy);
1609 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0);
1610 if (IS_ERR(di)) {
1611 btrfs_free_path(path);
1612 return PTR_ERR(di);
1613 }
1614 if (!di) {
1615 /*
1616 * Ok the default dir item isn't there. This is weird since
1617 * it's always been there, but don't freak out, just try and
1618 * mount the top-level subvolume.
1619 */
1620 btrfs_free_path(path);
1621 *objectid = BTRFS_FS_TREE_OBJECTID;
1622 return 0;
1623 }
1624
1625 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
1626 btrfs_free_path(path);
1627 *objectid = location.objectid;
1628 return 0;
1629 }
1630
1631 static int btrfs_fill_super(struct super_block *sb,
1632 struct btrfs_fs_devices *fs_devices,
1633 void *data)
1634 {
1635 struct inode *inode;
1636 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1637 int err;
1638
1639 sb->s_maxbytes = MAX_LFS_FILESIZE;
1640 sb->s_magic = BTRFS_SUPER_MAGIC;
1641 sb->s_op = &btrfs_super_ops;
1642 sb->s_d_op = &btrfs_dentry_operations;
1643 sb->s_export_op = &btrfs_export_ops;
1644 #ifdef CONFIG_FS_VERITY
1645 sb->s_vop = &btrfs_verityops;
1646 #endif
1647 sb->s_xattr = btrfs_xattr_handlers;
1648 sb->s_time_gran = 1;
1649 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
1650 sb->s_flags |= SB_POSIXACL;
1651 #endif
1652 sb->s_flags |= SB_I_VERSION;
1653 sb->s_iflags |= SB_I_CGROUPWB;
1654
1655 err = super_setup_bdi(sb);
1656 if (err) {
1657 btrfs_err(fs_info, "super_setup_bdi failed");
1658 return err;
1659 }
1660
1661 err = open_ctree(sb, fs_devices, (char *)data);
1662 if (err) {
1663 btrfs_err(fs_info, "open_ctree failed");
1664 return err;
1665 }
1666
1667 inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
1668 if (IS_ERR(inode)) {
1669 err = PTR_ERR(inode);
1670 btrfs_handle_fs_error(fs_info, err, NULL);
1671 goto fail_close;
1672 }
1673
1674 sb->s_root = d_make_root(inode);
1675 if (!sb->s_root) {
1676 err = -ENOMEM;
1677 goto fail_close;
1678 }
1679
1680 sb->s_flags |= SB_ACTIVE;
1681 return 0;
1682
1683 fail_close:
1684 close_ctree(fs_info);
1685 return err;
1686 }
1687
1688 int btrfs_sync_fs(struct super_block *sb, int wait)
1689 {
1690 struct btrfs_trans_handle *trans;
1691 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1692 struct btrfs_root *root = fs_info->tree_root;
1693
1694 trace_btrfs_sync_fs(fs_info, wait);
1695
1696 if (!wait) {
1697 filemap_flush(fs_info->btree_inode->i_mapping);
1698 return 0;
1699 }
1700
1701 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
1702
1703 trans = btrfs_attach_transaction_barrier(root);
1704 if (IS_ERR(trans)) {
1705 /* no transaction, don't bother */
1706 if (PTR_ERR(trans) == -ENOENT) {
1707 /*
1708 * Exit unless we have some pending changes
1709 * that need to go through commit
1710 */
1711 if (!test_bit(BTRFS_FS_NEED_TRANS_COMMIT,
1712 &fs_info->flags))
1713 return 0;
1714 /*
1715 * A non-blocking test if the fs is frozen. We must not
1716 * start a new transaction here otherwise a deadlock
1717 * happens. The pending operations are delayed to the
1718 * next commit after thawing.
1719 */
1720 if (sb_start_write_trylock(sb))
1721 sb_end_write(sb);
1722 else
1723 return 0;
1724 trans = btrfs_start_transaction(root, 0);
1725 }
1726 if (IS_ERR(trans))
1727 return PTR_ERR(trans);
1728 }
1729 return btrfs_commit_transaction(trans);
1730 }
1731
1732 static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed)
1733 {
1734 seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s);
1735 *printed = true;
1736 }
1737
1738 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1739 {
1740 struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
1741 const char *compress_type;
1742 const char *subvol_name;
1743 bool printed = false;
1744
1745 if (btrfs_test_opt(info, DEGRADED))
1746 seq_puts(seq, ",degraded");
1747 if (btrfs_test_opt(info, NODATASUM))
1748 seq_puts(seq, ",nodatasum");
1749 if (btrfs_test_opt(info, NODATACOW))
1750 seq_puts(seq, ",nodatacow");
1751 if (btrfs_test_opt(info, NOBARRIER))
1752 seq_puts(seq, ",nobarrier");
1753 if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1754 seq_printf(seq, ",max_inline=%llu", info->max_inline);
1755 if (info->thread_pool_size != min_t(unsigned long,
1756 num_online_cpus() + 2, 8))
1757 seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
1758 if (btrfs_test_opt(info, COMPRESS)) {
1759 compress_type = btrfs_compress_type2str(info->compress_type);
1760 if (btrfs_test_opt(info, FORCE_COMPRESS))
1761 seq_printf(seq, ",compress-force=%s", compress_type);
1762 else
1763 seq_printf(seq, ",compress=%s", compress_type);
1764 if (info->compress_level)
1765 seq_printf(seq, ":%d", info->compress_level);
1766 }
1767 if (btrfs_test_opt(info, NOSSD))
1768 seq_puts(seq, ",nossd");
1769 if (btrfs_test_opt(info, SSD_SPREAD))
1770 seq_puts(seq, ",ssd_spread");
1771 else if (btrfs_test_opt(info, SSD))
1772 seq_puts(seq, ",ssd");
1773 if (btrfs_test_opt(info, NOTREELOG))
1774 seq_puts(seq, ",notreelog");
1775 if (btrfs_test_opt(info, NOLOGREPLAY))
1776 print_rescue_option(seq, "nologreplay", &printed);
1777 if (btrfs_test_opt(info, USEBACKUPROOT))
1778 print_rescue_option(seq, "usebackuproot", &printed);
1779 if (btrfs_test_opt(info, IGNOREBADROOTS))
1780 print_rescue_option(seq, "ignorebadroots", &printed);
1781 if (btrfs_test_opt(info, IGNOREDATACSUMS))
1782 print_rescue_option(seq, "ignoredatacsums", &printed);
1783 if (btrfs_test_opt(info, FLUSHONCOMMIT))
1784 seq_puts(seq, ",flushoncommit");
1785 if (btrfs_test_opt(info, DISCARD_SYNC))
1786 seq_puts(seq, ",discard");
1787 if (btrfs_test_opt(info, DISCARD_ASYNC))
1788 seq_puts(seq, ",discard=async");
1789 if (!(info->sb->s_flags & SB_POSIXACL))
1790 seq_puts(seq, ",noacl");
1791 if (btrfs_free_space_cache_v1_active(info))
1792 seq_puts(seq, ",space_cache");
1793 else if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
1794 seq_puts(seq, ",space_cache=v2");
1795 else
1796 seq_puts(seq, ",nospace_cache");
1797 if (btrfs_test_opt(info, RESCAN_UUID_TREE))
1798 seq_puts(seq, ",rescan_uuid_tree");
1799 if (btrfs_test_opt(info, CLEAR_CACHE))
1800 seq_puts(seq, ",clear_cache");
1801 if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
1802 seq_puts(seq, ",user_subvol_rm_allowed");
1803 if (btrfs_test_opt(info, ENOSPC_DEBUG))
1804 seq_puts(seq, ",enospc_debug");
1805 if (btrfs_test_opt(info, AUTO_DEFRAG))
1806 seq_puts(seq, ",autodefrag");
1807 if (btrfs_test_opt(info, SKIP_BALANCE))
1808 seq_puts(seq, ",skip_balance");
1809 if (info->metadata_ratio)
1810 seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
1811 if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
1812 seq_puts(seq, ",fatal_errors=panic");
1813 if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1814 seq_printf(seq, ",commit=%u", info->commit_interval);
1815 #ifdef CONFIG_BTRFS_DEBUG
1816 if (btrfs_test_opt(info, FRAGMENT_DATA))
1817 seq_puts(seq, ",fragment=data");
1818 if (btrfs_test_opt(info, FRAGMENT_METADATA))
1819 seq_puts(seq, ",fragment=metadata");
1820 #endif
1821 if (btrfs_test_opt(info, REF_VERIFY))
1822 seq_puts(seq, ",ref_verify");
1823 seq_printf(seq, ",subvolid=%llu",
1824 BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1825 subvol_name = btrfs_get_subvol_name_from_objectid(info,
1826 BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1827 if (!IS_ERR(subvol_name)) {
1828 seq_puts(seq, ",subvol=");
1829 seq_escape(seq, subvol_name, " \t\n\\");
1830 kfree(subvol_name);
1831 }
1832 return 0;
1833 }
1834
1835 static int btrfs_test_super(struct super_block *s, void *data)
1836 {
1837 struct btrfs_fs_info *p = data;
1838 struct btrfs_fs_info *fs_info = btrfs_sb(s);
1839
1840 return fs_info->fs_devices == p->fs_devices;
1841 }
1842
1843 static int btrfs_set_super(struct super_block *s, void *data)
1844 {
1845 int err = set_anon_super(s, data);
1846 if (!err)
1847 s->s_fs_info = data;
1848 return err;
1849 }
1850
1851 /*
1852 * subvolumes are identified by ino 256
1853 */
1854 static inline int is_subvolume_inode(struct inode *inode)
1855 {
1856 if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
1857 return 1;
1858 return 0;
1859 }
1860
1861 static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1862 struct vfsmount *mnt)
1863 {
1864 struct dentry *root;
1865 int ret;
1866
1867 if (!subvol_name) {
1868 if (!subvol_objectid) {
1869 ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
1870 &subvol_objectid);
1871 if (ret) {
1872 root = ERR_PTR(ret);
1873 goto out;
1874 }
1875 }
1876 subvol_name = btrfs_get_subvol_name_from_objectid(
1877 btrfs_sb(mnt->mnt_sb), subvol_objectid);
1878 if (IS_ERR(subvol_name)) {
1879 root = ERR_CAST(subvol_name);
1880 subvol_name = NULL;
1881 goto out;
1882 }
1883
1884 }
1885
1886 root = mount_subtree(mnt, subvol_name);
1887 /* mount_subtree() drops our reference on the vfsmount. */
1888 mnt = NULL;
1889
1890 if (!IS_ERR(root)) {
1891 struct super_block *s = root->d_sb;
1892 struct btrfs_fs_info *fs_info = btrfs_sb(s);
1893 struct inode *root_inode = d_inode(root);
1894 u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
1895
1896 ret = 0;
1897 if (!is_subvolume_inode(root_inode)) {
1898 btrfs_err(fs_info, "'%s' is not a valid subvolume",
1899 subvol_name);
1900 ret = -EINVAL;
1901 }
1902 if (subvol_objectid && root_objectid != subvol_objectid) {
1903 /*
1904 * This will also catch a race condition where a
1905 * subvolume which was passed by ID is renamed and
1906 * another subvolume is renamed over the old location.
1907 */
1908 btrfs_err(fs_info,
1909 "subvol '%s' does not match subvolid %llu",
1910 subvol_name, subvol_objectid);
1911 ret = -EINVAL;
1912 }
1913 if (ret) {
1914 dput(root);
1915 root = ERR_PTR(ret);
1916 deactivate_locked_super(s);
1917 }
1918 }
1919
1920 out:
1921 mntput(mnt);
1922 kfree(subvol_name);
1923 return root;
1924 }
1925
1926 /*
1927 * Find a superblock for the given device / mount point.
1928 *
1929 * Note: This is based on mount_bdev from fs/super.c with a few additions
1930 * for multiple device setup. Make sure to keep it in sync.
1931 */
1932 static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
1933 int flags, const char *device_name, void *data)
1934 {
1935 struct block_device *bdev = NULL;
1936 struct super_block *s;
1937 struct btrfs_device *device = NULL;
1938 struct btrfs_fs_devices *fs_devices = NULL;
1939 struct btrfs_fs_info *fs_info = NULL;
1940 void *new_sec_opts = NULL;
1941 blk_mode_t mode = sb_open_mode(flags);
1942 int error = 0;
1943
1944 if (data) {
1945 error = security_sb_eat_lsm_opts(data, &new_sec_opts);
1946 if (error)
1947 return ERR_PTR(error);
1948 }
1949
1950 /*
1951 * Setup a dummy root and fs_info for test/set super. This is because
1952 * we don't actually fill this stuff out until open_ctree, but we need
1953 * then open_ctree will properly initialize the file system specific
1954 * settings later. btrfs_init_fs_info initializes the static elements
1955 * of the fs_info (locks and such) to make cleanup easier if we find a
1956 * superblock with our given fs_devices later on at sget() time.
1957 */
1958 fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
1959 if (!fs_info) {
1960 error = -ENOMEM;
1961 goto error_sec_opts;
1962 }
1963 btrfs_init_fs_info(fs_info);
1964
1965 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1966 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1967 if (!fs_info->super_copy || !fs_info->super_for_commit) {
1968 error = -ENOMEM;
1969 goto error_fs_info;
1970 }
1971
1972 mutex_lock(&uuid_mutex);
1973 error = btrfs_parse_device_options(data, mode);
1974 if (error) {
1975 mutex_unlock(&uuid_mutex);
1976 goto error_fs_info;
1977 }
1978
1979 /*
1980 * With 'true' passed to btrfs_scan_one_device() (mount time) we expect
1981 * either a valid device or an error.
1982 */
1983 device = btrfs_scan_one_device(device_name, mode, true);
1984 ASSERT(device != NULL);
1985 if (IS_ERR(device)) {
1986 mutex_unlock(&uuid_mutex);
1987 error = PTR_ERR(device);
1988 goto error_fs_info;
1989 }
1990
1991 fs_devices = device->fs_devices;
1992 fs_info->fs_devices = fs_devices;
1993
1994 error = btrfs_open_devices(fs_devices, mode, fs_type);
1995 mutex_unlock(&uuid_mutex);
1996 if (error)
1997 goto error_fs_info;
1998
1999 if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
2000 error = -EACCES;
2001 goto error_close_devices;
2002 }
2003
2004 bdev = fs_devices->latest_dev->bdev;
2005 s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
2006 fs_info);
2007 if (IS_ERR(s)) {
2008 error = PTR_ERR(s);
2009 goto error_close_devices;
2010 }
2011
2012 if (s->s_root) {
2013 btrfs_close_devices(fs_devices);
2014 btrfs_free_fs_info(fs_info);
2015 if ((flags ^ s->s_flags) & SB_RDONLY)
2016 error = -EBUSY;
2017 } else {
2018 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
2019 shrinker_debugfs_rename(s->s_shrink, "sb-%s:%s", fs_type->name,
2020 s->s_id);
2021 btrfs_sb(s)->bdev_holder = fs_type;
2022 error = btrfs_fill_super(s, fs_devices, data);
2023 }
2024 if (!error)
2025 error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
2026 security_free_mnt_opts(&new_sec_opts);
2027 if (error) {
2028 deactivate_locked_super(s);
2029 return ERR_PTR(error);
2030 }
2031
2032 return dget(s->s_root);
2033
2034 error_close_devices:
2035 btrfs_close_devices(fs_devices);
2036 error_fs_info:
2037 btrfs_free_fs_info(fs_info);
2038 error_sec_opts:
2039 security_free_mnt_opts(&new_sec_opts);
2040 return ERR_PTR(error);
2041 }
2042
2043 /*
2044 * Mount function which is called by VFS layer.
2045 *
2046 * In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
2047 * which needs vfsmount* of device's root (/). This means device's root has to
2048 * be mounted internally in any case.
2049 *
2050 * Operation flow:
2051 * 1. Parse subvol id related options for later use in mount_subvol().
2052 *
2053 * 2. Mount device's root (/) by calling vfs_kern_mount().
2054 *
2055 * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
2056 * first place. In order to avoid calling btrfs_mount() again, we use
2057 * different file_system_type which is not registered to VFS by
2058 * register_filesystem() (btrfs_root_fs_type). As a result,
2059 * btrfs_mount_root() is called. The return value will be used by
2060 * mount_subtree() in mount_subvol().
2061 *
2062 * 3. Call mount_subvol() to get the dentry of subvolume. Since there is
2063 * "btrfs subvolume set-default", mount_subvol() is called always.
2064 */
2065 static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
2066 const char *device_name, void *data)
2067 {
2068 struct vfsmount *mnt_root;
2069 struct dentry *root;
2070 char *subvol_name = NULL;
2071 u64 subvol_objectid = 0;
2072 int error = 0;
2073
2074 error = btrfs_parse_subvol_options(data, &subvol_name,
2075 &subvol_objectid);
2076 if (error) {
2077 kfree(subvol_name);
2078 return ERR_PTR(error);
2079 }
2080
2081 /* mount device's root (/) */
2082 mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
2083 if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
2084 if (flags & SB_RDONLY) {
2085 mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
2086 flags & ~SB_RDONLY, device_name, data);
2087 } else {
2088 mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
2089 flags | SB_RDONLY, device_name, data);
2090 if (IS_ERR(mnt_root)) {
2091 root = ERR_CAST(mnt_root);
2092 kfree(subvol_name);
2093 goto out;
2094 }
2095
2096 down_write(&mnt_root->mnt_sb->s_umount);
2097 error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
2098 up_write(&mnt_root->mnt_sb->s_umount);
2099 if (error < 0) {
2100 root = ERR_PTR(error);
2101 mntput(mnt_root);
2102 kfree(subvol_name);
2103 goto out;
2104 }
2105 }
2106 }
2107 if (IS_ERR(mnt_root)) {
2108 root = ERR_CAST(mnt_root);
2109 kfree(subvol_name);
2110 goto out;
2111 }
2112
2113 /* mount_subvol() will free subvol_name and mnt_root */
2114 root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
2115
2116 out:
2117 return root;
2118 }
2119
2120 static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
2121 u32 new_pool_size, u32 old_pool_size)
2122 {
2123 if (new_pool_size == old_pool_size)
2124 return;
2125
2126 fs_info->thread_pool_size = new_pool_size;
2127
2128 btrfs_info(fs_info, "resize thread pool %d -> %d",
2129 old_pool_size, new_pool_size);
2130
2131 btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
2132 btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
2133 btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
2134 workqueue_set_max_active(fs_info->endio_workers, new_pool_size);
2135 workqueue_set_max_active(fs_info->endio_meta_workers, new_pool_size);
2136 btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
2137 btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
2138 btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
2139 }
2140
2141 static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
2142 unsigned long old_opts, int flags)
2143 {
2144 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
2145 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
2146 (flags & SB_RDONLY))) {
2147 /* wait for any defraggers to finish */
2148 wait_event(fs_info->transaction_wait,
2149 (atomic_read(&fs_info->defrag_running) == 0));
2150 if (flags & SB_RDONLY)
2151 sync_filesystem(fs_info->sb);
2152 }
2153 }
2154
2155 static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
2156 unsigned long old_opts)
2157 {
2158 const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
2159
2160 /*
2161 * We need to cleanup all defragable inodes if the autodefragment is
2162 * close or the filesystem is read only.
2163 */
2164 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
2165 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) {
2166 btrfs_cleanup_defrag_inodes(fs_info);
2167 }
2168
2169 /* If we toggled discard async */
2170 if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
2171 btrfs_test_opt(fs_info, DISCARD_ASYNC))
2172 btrfs_discard_resume(fs_info);
2173 else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
2174 !btrfs_test_opt(fs_info, DISCARD_ASYNC))
2175 btrfs_discard_cleanup(fs_info);
2176
2177 /* If we toggled space cache */
2178 if (cache_opt != btrfs_free_space_cache_v1_active(fs_info))
2179 btrfs_set_free_space_cache_v1_active(fs_info, cache_opt);
2180 }
2181
2182 static int btrfs_remount_rw(struct btrfs_fs_info *fs_info)
2183 {
2184 int ret;
2185
2186 if (BTRFS_FS_ERROR(fs_info)) {
2187 btrfs_err(fs_info,
2188 "remounting read-write after error is not allowed");
2189 return -EINVAL;
2190 }
2191
2192 if (fs_info->fs_devices->rw_devices == 0)
2193 return -EACCES;
2194
2195 if (!btrfs_check_rw_degradable(fs_info, NULL)) {
2196 btrfs_warn(fs_info,
2197 "too many missing devices, writable remount is not allowed");
2198 return -EACCES;
2199 }
2200
2201 if (btrfs_super_log_root(fs_info->super_copy) != 0) {
2202 btrfs_warn(fs_info,
2203 "mount required to replay tree-log, cannot remount read-write");
2204 return -EINVAL;
2205 }
2206
2207 /*
2208 * NOTE: when remounting with a change that does writes, don't put it
2209 * anywhere above this point, as we are not sure to be safe to write
2210 * until we pass the above checks.
2211 */
2212 ret = btrfs_start_pre_rw_mount(fs_info);
2213 if (ret)
2214 return ret;
2215
2216 btrfs_clear_sb_rdonly(fs_info->sb);
2217
2218 set_bit(BTRFS_FS_OPEN, &fs_info->flags);
2219
2220 /*
2221 * If we've gone from readonly -> read-write, we need to get our
2222 * sync/async discard lists in the right state.
2223 */
2224 btrfs_discard_resume(fs_info);
2225
2226 return 0;
2227 }
2228
2229 static int btrfs_remount_ro(struct btrfs_fs_info *fs_info)
2230 {
2231 /*
2232 * This also happens on 'umount -rf' or on shutdown, when the
2233 * filesystem is busy.
2234 */
2235 cancel_work_sync(&fs_info->async_reclaim_work);
2236 cancel_work_sync(&fs_info->async_data_reclaim_work);
2237
2238 btrfs_discard_cleanup(fs_info);
2239
2240 /* Wait for the uuid_scan task to finish */
2241 down(&fs_info->uuid_tree_rescan_sem);
2242 /* Avoid complains from lockdep et al. */
2243 up(&fs_info->uuid_tree_rescan_sem);
2244
2245 btrfs_set_sb_rdonly(fs_info->sb);
2246
2247 /*
2248 * Setting SB_RDONLY will put the cleaner thread to sleep at the next
2249 * loop if it's already active. If it's already asleep, we'll leave
2250 * unused block groups on disk until we're mounted read-write again
2251 * unless we clean them up here.
2252 */
2253 btrfs_delete_unused_bgs(fs_info);
2254
2255 /*
2256 * The cleaner task could be already running before we set the flag
2257 * BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock). We must make
2258 * sure that after we finish the remount, i.e. after we call
2259 * btrfs_commit_super(), the cleaner can no longer start a transaction
2260 * - either because it was dropping a dead root, running delayed iputs
2261 * or deleting an unused block group (the cleaner picked a block
2262 * group from the list of unused block groups before we were able to
2263 * in the previous call to btrfs_delete_unused_bgs()).
2264 */
2265 wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, TASK_UNINTERRUPTIBLE);
2266
2267 /*
2268 * We've set the superblock to RO mode, so we might have made the
2269 * cleaner task sleep without running all pending delayed iputs. Go
2270 * through all the delayed iputs here, so that if an unmount happens
2271 * without remounting RW we don't end up at finishing close_ctree()
2272 * with a non-empty list of delayed iputs.
2273 */
2274 btrfs_run_delayed_iputs(fs_info);
2275
2276 btrfs_dev_replace_suspend_for_unmount(fs_info);
2277 btrfs_scrub_cancel(fs_info);
2278 btrfs_pause_balance(fs_info);
2279
2280 /*
2281 * Pause the qgroup rescan worker if it is running. We don't want it to
2282 * be still running after we are in RO mode, as after that, by the time
2283 * we unmount, it might have left a transaction open, so we would leak
2284 * the transaction and/or crash.
2285 */
2286 btrfs_qgroup_wait_for_completion(fs_info, false);
2287
2288 return btrfs_commit_super(fs_info);
2289 }
2290
2291 static int btrfs_remount(struct super_block *sb, int *flags, char *data)
2292 {
2293 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2294 unsigned old_flags = sb->s_flags;
2295 unsigned long old_opts = fs_info->mount_opt;
2296 unsigned long old_compress_type = fs_info->compress_type;
2297 u64 old_max_inline = fs_info->max_inline;
2298 u32 old_thread_pool_size = fs_info->thread_pool_size;
2299 u32 old_metadata_ratio = fs_info->metadata_ratio;
2300 int ret;
2301
2302 sync_filesystem(sb);
2303 set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2304
2305 if (data) {
2306 void *new_sec_opts = NULL;
2307
2308 ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
2309 if (!ret)
2310 ret = security_sb_remount(sb, new_sec_opts);
2311 security_free_mnt_opts(&new_sec_opts);
2312 if (ret)
2313 goto restore;
2314 }
2315
2316 ret = btrfs_parse_options(fs_info, data, *flags);
2317 if (ret)
2318 goto restore;
2319
2320 ret = btrfs_check_features(fs_info, !(*flags & SB_RDONLY));
2321 if (ret < 0)
2322 goto restore;
2323
2324 btrfs_remount_begin(fs_info, old_opts, *flags);
2325 btrfs_resize_thread_pool(fs_info,
2326 fs_info->thread_pool_size, old_thread_pool_size);
2327
2328 if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
2329 (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
2330 (!sb_rdonly(sb) || (*flags & SB_RDONLY))) {
2331 btrfs_warn(fs_info,
2332 "remount supports changing free space tree only from ro to rw");
2333 /* Make sure free space cache options match the state on disk */
2334 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
2335 btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
2336 btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
2337 }
2338 if (btrfs_free_space_cache_v1_active(fs_info)) {
2339 btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE);
2340 btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE);
2341 }
2342 }
2343
2344 ret = 0;
2345 if (!sb_rdonly(sb) && (*flags & SB_RDONLY))
2346 ret = btrfs_remount_ro(fs_info);
2347 else if (sb_rdonly(sb) && !(*flags & SB_RDONLY))
2348 ret = btrfs_remount_rw(fs_info);
2349 if (ret)
2350 goto restore;
2351
2352 /*
2353 * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS,
2354 * since the absence of the flag means it can be toggled off by remount.
2355 */
2356 *flags |= SB_I_VERSION;
2357
2358 wake_up_process(fs_info->transaction_kthread);
2359 btrfs_remount_cleanup(fs_info, old_opts);
2360 btrfs_clear_oneshot_options(fs_info);
2361 clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2362
2363 return 0;
2364
2365 restore:
2366 /* We've hit an error - don't reset SB_RDONLY */
2367 if (sb_rdonly(sb))
2368 old_flags |= SB_RDONLY;
2369 if (!(old_flags & SB_RDONLY))
2370 clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
2371 sb->s_flags = old_flags;
2372 fs_info->mount_opt = old_opts;
2373 fs_info->compress_type = old_compress_type;
2374 fs_info->max_inline = old_max_inline;
2375 btrfs_resize_thread_pool(fs_info,
2376 old_thread_pool_size, fs_info->thread_pool_size);
2377 fs_info->metadata_ratio = old_metadata_ratio;
2378 btrfs_remount_cleanup(fs_info, old_opts);
2379 clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2380
2381 return ret;
2382 }
2383
2384 static void btrfs_ctx_to_info(struct btrfs_fs_info *fs_info, struct btrfs_fs_context *ctx)
2385 {
2386 fs_info->max_inline = ctx->max_inline;
2387 fs_info->commit_interval = ctx->commit_interval;
2388 fs_info->metadata_ratio = ctx->metadata_ratio;
2389 fs_info->thread_pool_size = ctx->thread_pool_size;
2390 fs_info->mount_opt = ctx->mount_opt;
2391 fs_info->compress_type = ctx->compress_type;
2392 fs_info->compress_level = ctx->compress_level;
2393 }
2394
2395 static void btrfs_info_to_ctx(struct btrfs_fs_info *fs_info, struct btrfs_fs_context *ctx)
2396 {
2397 ctx->max_inline = fs_info->max_inline;
2398 ctx->commit_interval = fs_info->commit_interval;
2399 ctx->metadata_ratio = fs_info->metadata_ratio;
2400 ctx->thread_pool_size = fs_info->thread_pool_size;
2401 ctx->mount_opt = fs_info->mount_opt;
2402 ctx->compress_type = fs_info->compress_type;
2403 ctx->compress_level = fs_info->compress_level;
2404 }
2405
2406 #define btrfs_info_if_set(fs_info, old_ctx, opt, fmt, args...) \
2407 do { \
2408 if ((!old_ctx || !btrfs_raw_test_opt(old_ctx->mount_opt, opt)) && \
2409 btrfs_raw_test_opt(fs_info->mount_opt, opt)) \
2410 btrfs_info(fs_info, fmt, ##args); \
2411 } while (0)
2412
2413 #define btrfs_info_if_unset(fs_info, old_ctx, opt, fmt, args...) \
2414 do { \
2415 if ((old_ctx && btrfs_raw_test_opt(old_ctx->mount_opt, opt)) && \
2416 !btrfs_raw_test_opt(fs_info->mount_opt, opt)) \
2417 btrfs_info(fs_info, fmt, ##args); \
2418 } while (0)
2419
2420 static void btrfs_emit_options(struct btrfs_fs_info *info,
2421 struct btrfs_fs_context *old)
2422 {
2423 btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum");
2424 btrfs_info_if_set(info, old, DEGRADED, "allowing degraded mounts");
2425 btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum");
2426 btrfs_info_if_set(info, old, SSD, "enabling ssd optimizations");
2427 btrfs_info_if_set(info, old, SSD_SPREAD, "using spread ssd allocation scheme");
2428 btrfs_info_if_set(info, old, NOBARRIER, "turning off barriers");
2429 btrfs_info_if_set(info, old, NOTREELOG, "disabling tree log");
2430 btrfs_info_if_set(info, old, NOLOGREPLAY, "disabling log replay at mount time");
2431 btrfs_info_if_set(info, old, FLUSHONCOMMIT, "turning on flush-on-commit");
2432 btrfs_info_if_set(info, old, DISCARD_SYNC, "turning on sync discard");
2433 btrfs_info_if_set(info, old, DISCARD_ASYNC, "turning on async discard");
2434 btrfs_info_if_set(info, old, FREE_SPACE_TREE, "enabling free space tree");
2435 btrfs_info_if_set(info, old, SPACE_CACHE, "enabling disk space caching");
2436 btrfs_info_if_set(info, old, CLEAR_CACHE, "force clearing of disk cache");
2437 btrfs_info_if_set(info, old, AUTO_DEFRAG, "enabling auto defrag");
2438 btrfs_info_if_set(info, old, FRAGMENT_DATA, "fragmenting data");
2439 btrfs_info_if_set(info, old, FRAGMENT_METADATA, "fragmenting metadata");
2440 btrfs_info_if_set(info, old, REF_VERIFY, "doing ref verification");
2441 btrfs_info_if_set(info, old, USEBACKUPROOT, "trying to use backup root at mount time");
2442 btrfs_info_if_set(info, old, IGNOREBADROOTS, "ignoring bad roots");
2443 btrfs_info_if_set(info, old, IGNOREDATACSUMS, "ignoring data csums");
2444
2445 btrfs_info_if_unset(info, old, NODATACOW, "setting datacow");
2446 btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations");
2447 btrfs_info_if_unset(info, old, SSD_SPREAD, "not using spread ssd allocation scheme");
2448 btrfs_info_if_unset(info, old, NOBARRIER, "turning off barriers");
2449 btrfs_info_if_unset(info, old, NOTREELOG, "enabling tree log");
2450 btrfs_info_if_unset(info, old, SPACE_CACHE, "disabling disk space caching");
2451 btrfs_info_if_unset(info, old, FREE_SPACE_TREE, "disabling free space tree");
2452 btrfs_info_if_unset(info, old, AUTO_DEFRAG, "disabling auto defrag");
2453 btrfs_info_if_unset(info, old, COMPRESS, "use no compression");
2454
2455 /* Did the compression settings change? */
2456 if (btrfs_test_opt(info, COMPRESS) &&
2457 (!old ||
2458 old->compress_type != info->compress_type ||
2459 old->compress_level != info->compress_level ||
2460 (!btrfs_raw_test_opt(old->mount_opt, FORCE_COMPRESS) &&
2461 btrfs_raw_test_opt(info->mount_opt, FORCE_COMPRESS)))) {
2462 const char *compress_type = btrfs_compress_type2str(info->compress_type);
2463
2464 btrfs_info(info, "%s %s compression, level %d",
2465 btrfs_test_opt(info, FORCE_COMPRESS) ? "force" : "use",
2466 compress_type, info->compress_level);
2467 }
2468
2469 if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
2470 btrfs_info(info, "max_inline set to %llu", info->max_inline);
2471 }
2472
2473 static int btrfs_reconfigure(struct fs_context *fc)
2474 {
2475 struct super_block *sb = fc->root->d_sb;
2476 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2477 struct btrfs_fs_context *ctx = fc->fs_private;
2478 struct btrfs_fs_context old_ctx;
2479 int ret = 0;
2480 bool mount_reconfigure = (fc->s_fs_info != NULL);
2481
2482 btrfs_info_to_ctx(fs_info, &old_ctx);
2483
2484 sync_filesystem(sb);
2485 set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2486
2487 if (!mount_reconfigure &&
2488 !check_options(fs_info, &ctx->mount_opt, fc->sb_flags))
2489 return -EINVAL;
2490
2491 ret = btrfs_check_features(fs_info, !(fc->sb_flags & SB_RDONLY));
2492 if (ret < 0)
2493 return ret;
2494
2495 btrfs_ctx_to_info(fs_info, ctx);
2496 btrfs_remount_begin(fs_info, old_ctx.mount_opt, fc->sb_flags);
2497 btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size,
2498 old_ctx.thread_pool_size);
2499
2500 if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
2501 (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
2502 (!sb_rdonly(sb) || (fc->sb_flags & SB_RDONLY))) {
2503 btrfs_warn(fs_info,
2504 "remount supports changing free space tree only from RO to RW");
2505 /* Make sure free space cache options match the state on disk. */
2506 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
2507 btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
2508 btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
2509 }
2510 if (btrfs_free_space_cache_v1_active(fs_info)) {
2511 btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE);
2512 btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE);
2513 }
2514 }
2515
2516 ret = 0;
2517 if (!sb_rdonly(sb) && (fc->sb_flags & SB_RDONLY))
2518 ret = btrfs_remount_ro(fs_info);
2519 else if (sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY))
2520 ret = btrfs_remount_rw(fs_info);
2521 if (ret)
2522 goto restore;
2523
2524 /*
2525 * If we set the mask during the parameter parsing VFS would reject the
2526 * remount. Here we can set the mask and the value will be updated
2527 * appropriately.
2528 */
2529 if ((fc->sb_flags & SB_POSIXACL) != (sb->s_flags & SB_POSIXACL))
2530 fc->sb_flags_mask |= SB_POSIXACL;
2531
2532 btrfs_emit_options(fs_info, &old_ctx);
2533 wake_up_process(fs_info->transaction_kthread);
2534 btrfs_remount_cleanup(fs_info, old_ctx.mount_opt);
2535 btrfs_clear_oneshot_options(fs_info);
2536 clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2537
2538 return 0;
2539 restore:
2540 btrfs_ctx_to_info(fs_info, &old_ctx);
2541 btrfs_remount_cleanup(fs_info, old_ctx.mount_opt);
2542 clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
2543 return ret;
2544 }
2545
2546 /* Used to sort the devices by max_avail(descending sort) */
2547 static int btrfs_cmp_device_free_bytes(const void *a, const void *b)
2548 {
2549 const struct btrfs_device_info *dev_info1 = a;
2550 const struct btrfs_device_info *dev_info2 = b;
2551
2552 if (dev_info1->max_avail > dev_info2->max_avail)
2553 return -1;
2554 else if (dev_info1->max_avail < dev_info2->max_avail)
2555 return 1;
2556 return 0;
2557 }
2558
2559 /*
2560 * sort the devices by max_avail, in which max free extent size of each device
2561 * is stored.(Descending Sort)
2562 */
2563 static inline void btrfs_descending_sort_devices(
2564 struct btrfs_device_info *devices,
2565 size_t nr_devices)
2566 {
2567 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
2568 btrfs_cmp_device_free_bytes, NULL);
2569 }
2570
2571 /*
2572 * The helper to calc the free space on the devices that can be used to store
2573 * file data.
2574 */
2575 static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
2576 u64 *free_bytes)
2577 {
2578 struct btrfs_device_info *devices_info;
2579 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2580 struct btrfs_device *device;
2581 u64 type;
2582 u64 avail_space;
2583 u64 min_stripe_size;
2584 int num_stripes = 1;
2585 int i = 0, nr_devices;
2586 const struct btrfs_raid_attr *rattr;
2587
2588 /*
2589 * We aren't under the device list lock, so this is racy-ish, but good
2590 * enough for our purposes.
2591 */
2592 nr_devices = fs_info->fs_devices->open_devices;
2593 if (!nr_devices) {
2594 smp_mb();
2595 nr_devices = fs_info->fs_devices->open_devices;
2596 ASSERT(nr_devices);
2597 if (!nr_devices) {
2598 *free_bytes = 0;
2599 return 0;
2600 }
2601 }
2602
2603 devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
2604 GFP_KERNEL);
2605 if (!devices_info)
2606 return -ENOMEM;
2607
2608 /* calc min stripe number for data space allocation */
2609 type = btrfs_data_alloc_profile(fs_info);
2610 rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];
2611
2612 if (type & BTRFS_BLOCK_GROUP_RAID0)
2613 num_stripes = nr_devices;
2614 else if (type & BTRFS_BLOCK_GROUP_RAID1_MASK)
2615 num_stripes = rattr->ncopies;
2616 else if (type & BTRFS_BLOCK_GROUP_RAID10)
2617 num_stripes = 4;
2618
2619 /* Adjust for more than 1 stripe per device */
2620 min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
2621
2622 rcu_read_lock();
2623 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
2624 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2625 &device->dev_state) ||
2626 !device->bdev ||
2627 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
2628 continue;
2629
2630 if (i >= nr_devices)
2631 break;
2632
2633 avail_space = device->total_bytes - device->bytes_used;
2634
2635 /* align with stripe_len */
2636 avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
2637
2638 /*
2639 * Ensure we have at least min_stripe_size on top of the
2640 * reserved space on the device.
2641 */
2642 if (avail_space <= BTRFS_DEVICE_RANGE_RESERVED + min_stripe_size)
2643 continue;
2644
2645 avail_space -= BTRFS_DEVICE_RANGE_RESERVED;
2646
2647 devices_info[i].dev = device;
2648 devices_info[i].max_avail = avail_space;
2649
2650 i++;
2651 }
2652 rcu_read_unlock();
2653
2654 nr_devices = i;
2655
2656 btrfs_descending_sort_devices(devices_info, nr_devices);
2657
2658 i = nr_devices - 1;
2659 avail_space = 0;
2660 while (nr_devices >= rattr->devs_min) {
2661 num_stripes = min(num_stripes, nr_devices);
2662
2663 if (devices_info[i].max_avail >= min_stripe_size) {
2664 int j;
2665 u64 alloc_size;
2666
2667 avail_space += devices_info[i].max_avail * num_stripes;
2668 alloc_size = devices_info[i].max_avail;
2669 for (j = i + 1 - num_stripes; j <= i; j++)
2670 devices_info[j].max_avail -= alloc_size;
2671 }
2672 i--;
2673 nr_devices--;
2674 }
2675
2676 kfree(devices_info);
2677 *free_bytes = avail_space;
2678 return 0;
2679 }
2680
2681 /*
2682 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
2683 *
2684 * If there's a redundant raid level at DATA block groups, use the respective
2685 * multiplier to scale the sizes.
2686 *
2687 * Unused device space usage is based on simulating the chunk allocator
2688 * algorithm that respects the device sizes and order of allocations. This is
2689 * a close approximation of the actual use but there are other factors that may
2690 * change the result (like a new metadata chunk).
2691 *
2692 * If metadata is exhausted, f_bavail will be 0.
2693 */
2694 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2695 {
2696 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
2697 struct btrfs_super_block *disk_super = fs_info->super_copy;
2698 struct btrfs_space_info *found;
2699 u64 total_used = 0;
2700 u64 total_free_data = 0;
2701 u64 total_free_meta = 0;
2702 u32 bits = fs_info->sectorsize_bits;
2703 __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
2704 unsigned factor = 1;
2705 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
2706 int ret;
2707 u64 thresh = 0;
2708 int mixed = 0;
2709
2710 list_for_each_entry(found, &fs_info->space_info, list) {
2711 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
2712 int i;
2713
2714 total_free_data += found->disk_total - found->disk_used;
2715 total_free_data -=
2716 btrfs_account_ro_block_groups_free_space(found);
2717
2718 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2719 if (!list_empty(&found->block_groups[i]))
2720 factor = btrfs_bg_type_to_factor(
2721 btrfs_raid_array[i].bg_flag);
2722 }
2723 }
2724
2725 /*
2726 * Metadata in mixed block group profiles are accounted in data
2727 */
2728 if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
2729 if (found->flags & BTRFS_BLOCK_GROUP_DATA)
2730 mixed = 1;
2731 else
2732 total_free_meta += found->disk_total -
2733 found->disk_used;
2734 }
2735
2736 total_used += found->disk_used;
2737 }
2738
2739 buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
2740 buf->f_blocks >>= bits;
2741 buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
2742
2743 /* Account global block reserve as used, it's in logical size already */
2744 spin_lock(&block_rsv->lock);
2745 /* Mixed block groups accounting is not byte-accurate, avoid overflow */
2746 if (buf->f_bfree >= block_rsv->size >> bits)
2747 buf->f_bfree -= block_rsv->size >> bits;
2748 else
2749 buf->f_bfree = 0;
2750 spin_unlock(&block_rsv->lock);
2751
2752 buf->f_bavail = div_u64(total_free_data, factor);
2753 ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
2754 if (ret)
2755 return ret;
2756 buf->f_bavail += div_u64(total_free_data, factor);
2757 buf->f_bavail = buf->f_bavail >> bits;
2758
2759 /*
2760 * We calculate the remaining metadata space minus global reserve. If
2761 * this is (supposedly) smaller than zero, there's no space. But this
2762 * does not hold in practice, the exhausted state happens where's still
2763 * some positive delta. So we apply some guesswork and compare the
2764 * delta to a 4M threshold. (Practically observed delta was ~2M.)
2765 *
2766 * We probably cannot calculate the exact threshold value because this
2767 * depends on the internal reservations requested by various
2768 * operations, so some operations that consume a few metadata will
2769 * succeed even if the Avail is zero. But this is better than the other
2770 * way around.
2771 */
2772 thresh = SZ_4M;
2773
2774 /*
2775 * We only want to claim there's no available space if we can no longer
2776 * allocate chunks for our metadata profile and our global reserve will
2777 * not fit in the free metadata space. If we aren't ->full then we
2778 * still can allocate chunks and thus are fine using the currently
2779 * calculated f_bavail.
2780 */
2781 if (!mixed && block_rsv->space_info->full &&
2782 (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size))
2783 buf->f_bavail = 0;
2784
2785 buf->f_type = BTRFS_SUPER_MAGIC;
2786 buf->f_bsize = dentry->d_sb->s_blocksize;
2787 buf->f_namelen = BTRFS_NAME_LEN;
2788
2789 /* We treat it as constant endianness (it doesn't matter _which_)
2790 because we want the fsid to come out the same whether mounted
2791 on a big-endian or little-endian host */
2792 buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
2793 buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
2794 /* Mask in the root object ID too, to disambiguate subvols */
2795 buf->f_fsid.val[0] ^=
2796 BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32;
2797 buf->f_fsid.val[1] ^=
2798 BTRFS_I(d_inode(dentry))->root->root_key.objectid;
2799
2800 return 0;
2801 }
2802
2803 static int btrfs_fc_test_super(struct super_block *sb, struct fs_context *fc)
2804 {
2805 struct btrfs_fs_info *p = fc->s_fs_info;
2806 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2807
2808 return fs_info->fs_devices == p->fs_devices;
2809 }
2810
2811 static int btrfs_get_tree_super(struct fs_context *fc)
2812 {
2813 struct btrfs_fs_info *fs_info = fc->s_fs_info;
2814 struct btrfs_fs_context *ctx = fc->fs_private;
2815 struct btrfs_fs_devices *fs_devices = NULL;
2816 struct block_device *bdev;
2817 struct btrfs_device *device;
2818 struct super_block *sb;
2819 blk_mode_t mode = sb_open_mode(fc->sb_flags);
2820 int ret;
2821
2822 btrfs_ctx_to_info(fs_info, ctx);
2823 mutex_lock(&uuid_mutex);
2824
2825 /*
2826 * With 'true' passed to btrfs_scan_one_device() (mount time) we expect
2827 * either a valid device or an error.
2828 */
2829 device = btrfs_scan_one_device(fc->source, mode, true);
2830 ASSERT(device != NULL);
2831 if (IS_ERR(device)) {
2832 mutex_unlock(&uuid_mutex);
2833 return PTR_ERR(device);
2834 }
2835
2836 fs_devices = device->fs_devices;
2837 fs_info->fs_devices = fs_devices;
2838
2839 ret = btrfs_open_devices(fs_devices, mode, &btrfs_fs_type);
2840 mutex_unlock(&uuid_mutex);
2841 if (ret)
2842 return ret;
2843
2844 if (!(fc->sb_flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
2845 ret = -EACCES;
2846 goto error;
2847 }
2848
2849 bdev = fs_devices->latest_dev->bdev;
2850
2851 /*
2852 * From now on the error handling is not straightforward.
2853 *
2854 * If successful, this will transfer the fs_info into the super block,
2855 * and fc->s_fs_info will be NULL. However if there's an existing
2856 * super, we'll still have fc->s_fs_info populated. If we error
2857 * completely out it'll be cleaned up when we drop the fs_context,
2858 * otherwise it's tied to the lifetime of the super_block.
2859 */
2860 sb = sget_fc(fc, btrfs_fc_test_super, set_anon_super_fc);
2861 if (IS_ERR(sb)) {
2862 ret = PTR_ERR(sb);
2863 goto error;
2864 }
2865
2866 if (sb->s_root) {
2867 btrfs_close_devices(fs_devices);
2868 if ((fc->sb_flags ^ sb->s_flags) & SB_RDONLY)
2869 ret = -EBUSY;
2870 } else {
2871 snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
2872 shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id);
2873 btrfs_sb(sb)->bdev_holder = &btrfs_fs_type;
2874 ret = btrfs_fill_super(sb, fs_devices, NULL);
2875 }
2876
2877 if (ret) {
2878 deactivate_locked_super(sb);
2879 return ret;
2880 }
2881
2882 fc->root = dget(sb->s_root);
2883 return 0;
2884
2885 error:
2886 btrfs_close_devices(fs_devices);
2887 return ret;
2888 }
2889
2890 /*
2891 * Ever since commit 0723a0473fb4 ("btrfs: allow mounting btrfs subvolumes
2892 * with different ro/rw options") the following works:
2893 *
2894 * (i) mount /dev/sda3 -o subvol=foo,ro /mnt/foo
2895 * (ii) mount /dev/sda3 -o subvol=bar,rw /mnt/bar
2896 *
2897 * which looks nice and innocent but is actually pretty intricate and deserves
2898 * a long comment.
2899 *
2900 * On another filesystem a subvolume mount is close to something like:
2901 *
2902 * (iii) # create rw superblock + initial mount
2903 * mount -t xfs /dev/sdb /opt/
2904 *
2905 * # create ro bind mount
2906 * mount --bind -o ro /opt/foo /mnt/foo
2907 *
2908 * # unmount initial mount
2909 * umount /opt
2910 *
2911 * Of course, there's some special subvolume sauce and there's the fact that the
2912 * sb->s_root dentry is really swapped after mount_subtree(). But conceptually
2913 * it's very close and will help us understand the issue.
2914 *
2915 * The old mount API didn't cleanly distinguish between a mount being made ro
2916 * and a superblock being made ro. The only way to change the ro state of
2917 * either object was by passing ms_rdonly. If a new mount was created via
2918 * mount(2) such as:
2919 *
2920 * mount("/dev/sdb", "/mnt", "xfs", ms_rdonly, null);
2921 *
2922 * the MS_RDONLY flag being specified had two effects:
2923 *
2924 * (1) MNT_READONLY was raised -> the resulting mount got
2925 * @mnt->mnt_flags |= MNT_READONLY raised.
2926 *
2927 * (2) MS_RDONLY was passed to the filesystem's mount method and the filesystems
2928 * made the superblock ro. Note, how SB_RDONLY has the same value as
2929 * ms_rdonly and is raised whenever MS_RDONLY is passed through mount(2).
2930 *
2931 * Creating a subtree mount via (iii) ends up leaving a rw superblock with a
2932 * subtree mounted ro.
2933 *
2934 * But consider the effect on the old mount API on btrfs subvolume mounting
2935 * which combines the distinct step in (iii) into a single step.
2936 *
2937 * By issuing (i) both the mount and the superblock are turned ro. Now when (ii)
2938 * is issued the superblock is ro and thus even if the mount created for (ii) is
2939 * rw it wouldn't help. Hence, btrfs needed to transition the superblock from ro
2940 * to rw for (ii) which it did using an internal remount call.
2941 *
2942 * IOW, subvolume mounting was inherently complicated due to the ambiguity of
2943 * MS_RDONLY in mount(2). Note, this ambiguity has mount(8) always translate
2944 * "ro" to MS_RDONLY. IOW, in both (i) and (ii) "ro" becomes MS_RDONLY when
2945 * passed by mount(8) to mount(2).
2946 *
2947 * Enter the new mount API. The new mount API disambiguates making a mount ro
2948 * and making a superblock ro.
2949 *
2950 * (3) To turn a mount ro the MOUNT_ATTR_ONLY flag can be used with either
2951 * fsmount() or mount_setattr() this is a pure VFS level change for a
2952 * specific mount or mount tree that is never seen by the filesystem itself.
2953 *
2954 * (4) To turn a superblock ro the "ro" flag must be used with
2955 * fsconfig(FSCONFIG_SET_FLAG, "ro"). This option is seen by the filesystem
2956 * in fc->sb_flags.
2957 *
2958 * This disambiguation has rather positive consequences. Mounting a subvolume
2959 * ro will not also turn the superblock ro. Only the mount for the subvolume
2960 * will become ro.
2961 *
2962 * So, if the superblock creation request comes from the new mount API the
2963 * caller must have explicitly done:
2964 *
2965 * fsconfig(FSCONFIG_SET_FLAG, "ro")
2966 * fsmount/mount_setattr(MOUNT_ATTR_RDONLY)
2967 *
2968 * IOW, at some point the caller must have explicitly turned the whole
2969 * superblock ro and we shouldn't just undo it like we did for the old mount
2970 * API. In any case, it lets us avoid the hack in the new mount API.
2971 *
2972 * Consequently, the remounting hack must only be used for requests originating
2973 * from the old mount API and should be marked for full deprecation so it can be
2974 * turned off in a couple of years.
2975 *
2976 * The new mount API has no reason to support this hack.
2977 */
2978 static struct vfsmount *btrfs_reconfigure_for_mount(struct fs_context *fc)
2979 {
2980 struct vfsmount *mnt;
2981 int ret;
2982 const bool ro2rw = !(fc->sb_flags & SB_RDONLY);
2983
2984 /*
2985 * We got an EBUSY because our SB_RDONLY flag didn't match the existing
2986 * super block, so invert our setting here and retry the mount so we
2987 * can get our vfsmount.
2988 */
2989 if (ro2rw)
2990 fc->sb_flags |= SB_RDONLY;
2991 else
2992 fc->sb_flags &= ~SB_RDONLY;
2993
2994 mnt = fc_mount(fc);
2995 if (IS_ERR(mnt))
2996 return mnt;
2997
2998 if (!fc->oldapi || !ro2rw)
2999 return mnt;
3000
3001 /* We need to convert to rw, call reconfigure. */
3002 fc->sb_flags &= ~SB_RDONLY;
3003 down_write(&mnt->mnt_sb->s_umount);
3004 ret = btrfs_reconfigure(fc);
3005 up_write(&mnt->mnt_sb->s_umount);
3006 if (ret) {
3007 mntput(mnt);
3008 return ERR_PTR(ret);
3009 }
3010 return mnt;
3011 }
3012
3013 static int btrfs_get_tree_subvol(struct fs_context *fc)
3014 {
3015 struct btrfs_fs_info *fs_info = NULL;
3016 struct btrfs_fs_context *ctx = fc->fs_private;
3017 struct fs_context *dup_fc;
3018 struct dentry *dentry;
3019 struct vfsmount *mnt;
3020
3021 /*
3022 * Setup a dummy root and fs_info for test/set super. This is because
3023 * we don't actually fill this stuff out until open_ctree, but we need
3024 * then open_ctree will properly initialize the file system specific
3025 * settings later. btrfs_init_fs_info initializes the static elements
3026 * of the fs_info (locks and such) to make cleanup easier if we find a
3027 * superblock with our given fs_devices later on at sget() time.
3028 */
3029 fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
3030 if (!fs_info)
3031 return -ENOMEM;
3032
3033 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
3034 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
3035 if (!fs_info->super_copy || !fs_info->super_for_commit) {
3036 btrfs_free_fs_info(fs_info);
3037 return -ENOMEM;
3038 }
3039 btrfs_init_fs_info(fs_info);
3040
3041 dup_fc = vfs_dup_fs_context(fc);
3042 if (IS_ERR(dup_fc)) {
3043 btrfs_free_fs_info(fs_info);
3044 return PTR_ERR(dup_fc);
3045 }
3046
3047 /*
3048 * When we do the sget_fc this gets transferred to the sb, so we only
3049 * need to set it on the dup_fc as this is what creates the super block.
3050 */
3051 dup_fc->s_fs_info = fs_info;
3052
3053 /*
3054 * We'll do the security settings in our btrfs_get_tree_super() mount
3055 * loop, they were duplicated into dup_fc, we can drop the originals
3056 * here.
3057 */
3058 security_free_mnt_opts(&fc->security);
3059 fc->security = NULL;
3060
3061 mnt = fc_mount(dup_fc);
3062 if (PTR_ERR_OR_ZERO(mnt) == -EBUSY)
3063 mnt = btrfs_reconfigure_for_mount(dup_fc);
3064 put_fs_context(dup_fc);
3065 if (IS_ERR(mnt))
3066 return PTR_ERR(mnt);
3067
3068 /*
3069 * This free's ->subvol_name, because if it isn't set we have to
3070 * allocate a buffer to hold the subvol_name, so we just drop our
3071 * reference to it here.
3072 */
3073 dentry = mount_subvol(ctx->subvol_name, ctx->subvol_objectid, mnt);
3074 ctx->subvol_name = NULL;
3075 if (IS_ERR(dentry))
3076 return PTR_ERR(dentry);
3077
3078 fc->root = dentry;
3079 return 0;
3080 }
3081
3082 static int btrfs_get_tree(struct fs_context *fc)
3083 {
3084 /*
3085 * Since we use mount_subtree to mount the default/specified subvol, we
3086 * have to do mounts in two steps.
3087 *
3088 * First pass through we call btrfs_get_tree_subvol(), this is just a
3089 * wrapper around fc_mount() to call back into here again, and this time
3090 * we'll call btrfs_get_tree_super(). This will do the open_ctree() and
3091 * everything to open the devices and file system. Then we return back
3092 * with a fully constructed vfsmount in btrfs_get_tree_subvol(), and
3093 * from there we can do our mount_subvol() call, which will lookup
3094 * whichever subvol we're mounting and setup this fc with the
3095 * appropriate dentry for the subvol.
3096 */
3097 if (fc->s_fs_info)
3098 return btrfs_get_tree_super(fc);
3099 return btrfs_get_tree_subvol(fc);
3100 }
3101
3102 static void btrfs_kill_super(struct super_block *sb)
3103 {
3104 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
3105 kill_anon_super(sb);
3106 btrfs_free_fs_info(fs_info);
3107 }
3108
3109 static void btrfs_free_fs_context(struct fs_context *fc)
3110 {
3111 struct btrfs_fs_context *ctx = fc->fs_private;
3112 struct btrfs_fs_info *fs_info = fc->s_fs_info;
3113
3114 if (fs_info)
3115 btrfs_free_fs_info(fs_info);
3116
3117 if (ctx && refcount_dec_and_test(&ctx->refs)) {
3118 kfree(ctx->subvol_name);
3119 kfree(ctx);
3120 }
3121 }
3122
3123 static int btrfs_dup_fs_context(struct fs_context *fc, struct fs_context *src_fc)
3124 {
3125 struct btrfs_fs_context *ctx = src_fc->fs_private;
3126
3127 /*
3128 * Give a ref to our ctx to this dup, as we want to keep it around for
3129 * our original fc so we can have the subvolume name or objectid.
3130 *
3131 * We unset ->source in the original fc because the dup needs it for
3132 * mounting, and then once we free the dup it'll free ->source, so we
3133 * need to make sure we're only pointing to it in one fc.
3134 */
3135 refcount_inc(&ctx->refs);
3136 fc->fs_private = ctx;
3137 fc->source = src_fc->source;
3138 src_fc->source = NULL;
3139 return 0;
3140 }
3141
3142 static const struct fs_context_operations btrfs_fs_context_ops = {
3143 .parse_param = btrfs_parse_param,
3144 .reconfigure = btrfs_reconfigure,
3145 .get_tree = btrfs_get_tree,
3146 .dup = btrfs_dup_fs_context,
3147 .free = btrfs_free_fs_context,
3148 };
3149
3150 static int __maybe_unused btrfs_init_fs_context(struct fs_context *fc)
3151 {
3152 struct btrfs_fs_context *ctx;
3153
3154 ctx = kzalloc(sizeof(struct btrfs_fs_context), GFP_KERNEL);
3155 if (!ctx)
3156 return -ENOMEM;
3157
3158 refcount_set(&ctx->refs, 1);
3159 fc->fs_private = ctx;
3160 fc->ops = &btrfs_fs_context_ops;
3161
3162 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
3163 btrfs_info_to_ctx(btrfs_sb(fc->root->d_sb), ctx);
3164 } else {
3165 ctx->thread_pool_size =
3166 min_t(unsigned long, num_online_cpus() + 2, 8);
3167 ctx->max_inline = BTRFS_DEFAULT_MAX_INLINE;
3168 ctx->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
3169 }
3170
3171 return 0;
3172 }
3173
3174 static struct file_system_type btrfs_fs_type = {
3175 .owner = THIS_MODULE,
3176 .name = "btrfs",
3177 .mount = btrfs_mount,
3178 .kill_sb = btrfs_kill_super,
3179 .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
3180 };
3181
3182 static struct file_system_type btrfs_root_fs_type = {
3183 .owner = THIS_MODULE,
3184 .name = "btrfs",
3185 .mount = btrfs_mount_root,
3186 .kill_sb = btrfs_kill_super,
3187 .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP,
3188 };
3189
3190 MODULE_ALIAS_FS("btrfs");
3191
3192 static int btrfs_control_open(struct inode *inode, struct file *file)
3193 {
3194 /*
3195 * The control file's private_data is used to hold the
3196 * transaction when it is started and is used to keep
3197 * track of whether a transaction is already in progress.
3198 */
3199 file->private_data = NULL;
3200 return 0;
3201 }
3202
3203 /*
3204 * Used by /dev/btrfs-control for devices ioctls.
3205 */
3206 static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
3207 unsigned long arg)
3208 {
3209 struct btrfs_ioctl_vol_args *vol;
3210 struct btrfs_device *device = NULL;
3211 dev_t devt = 0;
3212 int ret = -ENOTTY;
3213
3214 if (!capable(CAP_SYS_ADMIN))
3215 return -EPERM;
3216
3217 vol = memdup_user((void __user *)arg, sizeof(*vol));
3218 if (IS_ERR(vol))
3219 return PTR_ERR(vol);
3220 vol->name[BTRFS_PATH_NAME_MAX] = '\0';
3221
3222 switch (cmd) {
3223 case BTRFS_IOC_SCAN_DEV:
3224 mutex_lock(&uuid_mutex);
3225 /*
3226 * Scanning outside of mount can return NULL which would turn
3227 * into 0 error code.
3228 */
3229 device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false);
3230 ret = PTR_ERR_OR_ZERO(device);
3231 mutex_unlock(&uuid_mutex);
3232 break;
3233 case BTRFS_IOC_FORGET_DEV:
3234 if (vol->name[0] != 0) {
3235 ret = lookup_bdev(vol->name, &devt);
3236 if (ret)
3237 break;
3238 }
3239 ret = btrfs_forget_devices(devt);
3240 break;
3241 case BTRFS_IOC_DEVICES_READY:
3242 mutex_lock(&uuid_mutex);
3243 /*
3244 * Scanning outside of mount can return NULL which would turn
3245 * into 0 error code.
3246 */
3247 device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false);
3248 if (IS_ERR_OR_NULL(device)) {
3249 mutex_unlock(&uuid_mutex);
3250 ret = PTR_ERR(device);
3251 break;
3252 }
3253 ret = !(device->fs_devices->num_devices ==
3254 device->fs_devices->total_devices);
3255 mutex_unlock(&uuid_mutex);
3256 break;
3257 case BTRFS_IOC_GET_SUPPORTED_FEATURES:
3258 ret = btrfs_ioctl_get_supported_features((void __user*)arg);
3259 break;
3260 }
3261
3262 kfree(vol);
3263 return ret;
3264 }
3265
3266 static int btrfs_freeze(struct super_block *sb)
3267 {
3268 struct btrfs_trans_handle *trans;
3269 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
3270 struct btrfs_root *root = fs_info->tree_root;
3271
3272 set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
3273 /*
3274 * We don't need a barrier here, we'll wait for any transaction that
3275 * could be in progress on other threads (and do delayed iputs that
3276 * we want to avoid on a frozen filesystem), or do the commit
3277 * ourselves.
3278 */
3279 trans = btrfs_attach_transaction_barrier(root);
3280 if (IS_ERR(trans)) {
3281 /* no transaction, don't bother */
3282 if (PTR_ERR(trans) == -ENOENT)
3283 return 0;
3284 return PTR_ERR(trans);
3285 }
3286 return btrfs_commit_transaction(trans);
3287 }
3288
3289 static int check_dev_super(struct btrfs_device *dev)
3290 {
3291 struct btrfs_fs_info *fs_info = dev->fs_info;
3292 struct btrfs_super_block *sb;
3293 u64 last_trans;
3294 u16 csum_type;
3295 int ret = 0;
3296
3297 /* This should be called with fs still frozen. */
3298 ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags));
3299
3300 /* Missing dev, no need to check. */
3301 if (!dev->bdev)
3302 return 0;
3303
3304 /* Only need to check the primary super block. */
3305 sb = btrfs_read_dev_one_super(dev->bdev, 0, true);
3306 if (IS_ERR(sb))
3307 return PTR_ERR(sb);
3308
3309 /* Verify the checksum. */
3310 csum_type = btrfs_super_csum_type(sb);
3311 if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) {
3312 btrfs_err(fs_info, "csum type changed, has %u expect %u",
3313 csum_type, btrfs_super_csum_type(fs_info->super_copy));
3314 ret = -EUCLEAN;
3315 goto out;
3316 }
3317
3318 if (btrfs_check_super_csum(fs_info, sb)) {
3319 btrfs_err(fs_info, "csum for on-disk super block no longer matches");
3320 ret = -EUCLEAN;
3321 goto out;
3322 }
3323
3324 /* Btrfs_validate_super() includes fsid check against super->fsid. */
3325 ret = btrfs_validate_super(fs_info, sb, 0);
3326 if (ret < 0)
3327 goto out;
3328
3329 last_trans = btrfs_get_last_trans_committed(fs_info);
3330 if (btrfs_super_generation(sb) != last_trans) {
3331 btrfs_err(fs_info, "transid mismatch, has %llu expect %llu",
3332 btrfs_super_generation(sb), last_trans);
3333 ret = -EUCLEAN;
3334 goto out;
3335 }
3336 out:
3337 btrfs_release_disk_super(sb);
3338 return ret;
3339 }
3340
3341 static int btrfs_unfreeze(struct super_block *sb)
3342 {
3343 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
3344 struct btrfs_device *device;
3345 int ret = 0;
3346
3347 /*
3348 * Make sure the fs is not changed by accident (like hibernation then
3349 * modified by other OS).
3350 * If we found anything wrong, we mark the fs error immediately.
3351 *
3352 * And since the fs is frozen, no one can modify the fs yet, thus
3353 * we don't need to hold device_list_mutex.
3354 */
3355 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
3356 ret = check_dev_super(device);
3357 if (ret < 0) {
3358 btrfs_handle_fs_error(fs_info, ret,
3359 "super block on devid %llu got modified unexpectedly",
3360 device->devid);
3361 break;
3362 }
3363 }
3364 clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
3365
3366 /*
3367 * We still return 0, to allow VFS layer to unfreeze the fs even the
3368 * above checks failed. Since the fs is either fine or read-only, we're
3369 * safe to continue, without causing further damage.
3370 */
3371 return 0;
3372 }
3373
3374 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
3375 {
3376 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
3377
3378 /*
3379 * There should be always a valid pointer in latest_dev, it may be stale
3380 * for a short moment in case it's being deleted but still valid until
3381 * the end of RCU grace period.
3382 */
3383 rcu_read_lock();
3384 seq_escape(m, btrfs_dev_name(fs_info->fs_devices->latest_dev), " \t\n\\");
3385 rcu_read_unlock();
3386
3387 return 0;
3388 }
3389
3390 static const struct super_operations btrfs_super_ops = {
3391 .drop_inode = btrfs_drop_inode,
3392 .evict_inode = btrfs_evict_inode,
3393 .put_super = btrfs_put_super,
3394 .sync_fs = btrfs_sync_fs,
3395 .show_options = btrfs_show_options,
3396 .show_devname = btrfs_show_devname,
3397 .alloc_inode = btrfs_alloc_inode,
3398 .destroy_inode = btrfs_destroy_inode,
3399 .free_inode = btrfs_free_inode,
3400 .statfs = btrfs_statfs,
3401 .remount_fs = btrfs_remount,
3402 .freeze_fs = btrfs_freeze,
3403 .unfreeze_fs = btrfs_unfreeze,
3404 };
3405
3406 static const struct file_operations btrfs_ctl_fops = {
3407 .open = btrfs_control_open,
3408 .unlocked_ioctl = btrfs_control_ioctl,
3409 .compat_ioctl = compat_ptr_ioctl,
3410 .owner = THIS_MODULE,
3411 .llseek = noop_llseek,
3412 };
3413
3414 static struct miscdevice btrfs_misc = {
3415 .minor = BTRFS_MINOR,
3416 .name = "btrfs-control",
3417 .fops = &btrfs_ctl_fops
3418 };
3419
3420 MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
3421 MODULE_ALIAS("devname:btrfs-control");
3422
3423 static int __init btrfs_interface_init(void)
3424 {
3425 return misc_register(&btrfs_misc);
3426 }
3427
3428 static __cold void btrfs_interface_exit(void)
3429 {
3430 misc_deregister(&btrfs_misc);
3431 }
3432
3433 static int __init btrfs_print_mod_info(void)
3434 {
3435 static const char options[] = ""
3436 #ifdef CONFIG_BTRFS_DEBUG
3437 ", debug=on"
3438 #endif
3439 #ifdef CONFIG_BTRFS_ASSERT
3440 ", assert=on"
3441 #endif
3442 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
3443 ", ref-verify=on"
3444 #endif
3445 #ifdef CONFIG_BLK_DEV_ZONED
3446 ", zoned=yes"
3447 #else
3448 ", zoned=no"
3449 #endif
3450 #ifdef CONFIG_FS_VERITY
3451 ", fsverity=yes"
3452 #else
3453 ", fsverity=no"
3454 #endif
3455 ;
3456 pr_info("Btrfs loaded%s\n", options);
3457 return 0;
3458 }
3459
3460 static int register_btrfs(void)
3461 {
3462 return register_filesystem(&btrfs_fs_type);
3463 }
3464
3465 static void unregister_btrfs(void)
3466 {
3467 unregister_filesystem(&btrfs_fs_type);
3468 }
3469
3470 /* Helper structure for long init/exit functions. */
3471 struct init_sequence {
3472 int (*init_func)(void);
3473 /* Can be NULL if the init_func doesn't need cleanup. */
3474 void (*exit_func)(void);
3475 };
3476
3477 static const struct init_sequence mod_init_seq[] = {
3478 {
3479 .init_func = btrfs_props_init,
3480 .exit_func = NULL,
3481 }, {
3482 .init_func = btrfs_init_sysfs,
3483 .exit_func = btrfs_exit_sysfs,
3484 }, {
3485 .init_func = btrfs_init_compress,
3486 .exit_func = btrfs_exit_compress,
3487 }, {
3488 .init_func = btrfs_init_cachep,
3489 .exit_func = btrfs_destroy_cachep,
3490 }, {
3491 .init_func = btrfs_transaction_init,
3492 .exit_func = btrfs_transaction_exit,
3493 }, {
3494 .init_func = btrfs_ctree_init,
3495 .exit_func = btrfs_ctree_exit,
3496 }, {
3497 .init_func = btrfs_free_space_init,
3498 .exit_func = btrfs_free_space_exit,
3499 }, {
3500 .init_func = extent_state_init_cachep,
3501 .exit_func = extent_state_free_cachep,
3502 }, {
3503 .init_func = extent_buffer_init_cachep,
3504 .exit_func = extent_buffer_free_cachep,
3505 }, {
3506 .init_func = btrfs_bioset_init,
3507 .exit_func = btrfs_bioset_exit,
3508 }, {
3509 .init_func = extent_map_init,
3510 .exit_func = extent_map_exit,
3511 }, {
3512 .init_func = ordered_data_init,
3513 .exit_func = ordered_data_exit,
3514 }, {
3515 .init_func = btrfs_delayed_inode_init,
3516 .exit_func = btrfs_delayed_inode_exit,
3517 }, {
3518 .init_func = btrfs_auto_defrag_init,
3519 .exit_func = btrfs_auto_defrag_exit,
3520 }, {
3521 .init_func = btrfs_delayed_ref_init,
3522 .exit_func = btrfs_delayed_ref_exit,
3523 }, {
3524 .init_func = btrfs_prelim_ref_init,
3525 .exit_func = btrfs_prelim_ref_exit,
3526 }, {
3527 .init_func = btrfs_interface_init,
3528 .exit_func = btrfs_interface_exit,
3529 }, {
3530 .init_func = btrfs_print_mod_info,
3531 .exit_func = NULL,
3532 }, {
3533 .init_func = btrfs_run_sanity_tests,
3534 .exit_func = NULL,
3535 }, {
3536 .init_func = register_btrfs,
3537 .exit_func = unregister_btrfs,
3538 }
3539 };
3540
3541 static bool mod_init_result[ARRAY_SIZE(mod_init_seq)];
3542
3543 static __always_inline void btrfs_exit_btrfs_fs(void)
3544 {
3545 int i;
3546
3547 for (i = ARRAY_SIZE(mod_init_seq) - 1; i >= 0; i--) {
3548 if (!mod_init_result[i])
3549 continue;
3550 if (mod_init_seq[i].exit_func)
3551 mod_init_seq[i].exit_func();
3552 mod_init_result[i] = false;
3553 }
3554 }
3555
3556 static void __exit exit_btrfs_fs(void)
3557 {
3558 btrfs_exit_btrfs_fs();
3559 btrfs_cleanup_fs_uuids();
3560 }
3561
3562 static int __init init_btrfs_fs(void)
3563 {
3564 int ret;
3565 int i;
3566
3567 for (i = 0; i < ARRAY_SIZE(mod_init_seq); i++) {
3568 ASSERT(!mod_init_result[i]);
3569 ret = mod_init_seq[i].init_func();
3570 if (ret < 0) {
3571 btrfs_exit_btrfs_fs();
3572 return ret;
3573 }
3574 mod_init_result[i] = true;
3575 }
3576 return 0;
3577 }
3578
3579 late_initcall(init_btrfs_fs);
3580 module_exit(exit_btrfs_fs)
3581
3582 MODULE_LICENSE("GPL");
3583 MODULE_SOFTDEP("pre: crc32c");
3584 MODULE_SOFTDEP("pre: xxhash64");
3585 MODULE_SOFTDEP("pre: sha256");
3586 MODULE_SOFTDEP("pre: blake2b-256");