1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
6 #include "libxfs_priv.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_inode.h"
17 #include "xfs_ialloc.h"
18 #include "xfs_alloc.h"
19 #include "xfs_trace.h"
20 #include "xfs_cksum.h"
21 #include "xfs_trans.h"
22 #include "xfs_bmap_btree.h"
23 #include "xfs_alloc_btree.h"
24 #include "xfs_ialloc_btree.h"
25 #include "xfs_rmap_btree.h"
27 #include "xfs_refcount_btree.h"
28 #include "xfs_da_format.h"
29 #include "xfs_da_btree.h"
32 * Physical superblock buffer manipulations. Shared with libxfs in userspace.
36 * Reference counting access wrappers to the perag structures.
37 * Because we never free per-ag structures, the only thing we
38 * have to protect against changes is the tree structure itself.
45 struct xfs_perag
*pag
;
49 pag
= radix_tree_lookup(&mp
->m_perag_tree
, agno
);
51 ASSERT(atomic_read(&pag
->pag_ref
) >= 0);
52 ref
= atomic_inc_return(&pag
->pag_ref
);
55 trace_xfs_perag_get(mp
, agno
, ref
, _RET_IP_
);
60 * search from @first to find the next perag with the given tag set.
68 struct xfs_perag
*pag
;
73 found
= radix_tree_gang_lookup_tag(&mp
->m_perag_tree
,
74 (void **)&pag
, first
, 1, tag
);
79 ref
= atomic_inc_return(&pag
->pag_ref
);
81 trace_xfs_perag_get_tag(mp
, pag
->pag_agno
, ref
, _RET_IP_
);
87 struct xfs_perag
*pag
)
91 ASSERT(atomic_read(&pag
->pag_ref
) > 0);
92 ref
= atomic_dec_return(&pag
->pag_ref
);
93 trace_xfs_perag_put(pag
->pag_mount
, pag
->pag_agno
, ref
, _RET_IP_
);
96 /* Check all the superblock fields we care about when reading one in. */
102 if (XFS_SB_VERSION_NUM(sbp
) != XFS_SB_VERSION_5
)
106 * Version 5 superblock feature mask validation. Reject combinations
107 * the kernel cannot support up front before checking anything else.
109 if (xfs_sb_has_compat_feature(sbp
, XFS_SB_FEAT_COMPAT_UNKNOWN
)) {
111 "Superblock has unknown compatible features (0x%x) enabled.",
112 (sbp
->sb_features_compat
& XFS_SB_FEAT_COMPAT_UNKNOWN
));
114 "Using a more recent kernel is recommended.");
117 if (xfs_sb_has_ro_compat_feature(sbp
, XFS_SB_FEAT_RO_COMPAT_UNKNOWN
)) {
119 "Superblock has unknown read-only compatible features (0x%x) enabled.",
120 (sbp
->sb_features_ro_compat
&
121 XFS_SB_FEAT_RO_COMPAT_UNKNOWN
));
122 if (!(mp
->m_flags
& XFS_MOUNT_RDONLY
)) {
124 "Attempted to mount read-only compatible filesystem read-write.");
126 "Filesystem can only be safely mounted read only.");
131 if (xfs_sb_has_incompat_feature(sbp
, XFS_SB_FEAT_INCOMPAT_UNKNOWN
)) {
133 "Superblock has unknown incompatible features (0x%x) enabled.",
134 (sbp
->sb_features_incompat
&
135 XFS_SB_FEAT_INCOMPAT_UNKNOWN
));
137 "Filesystem cannot be safely mounted by this kernel.");
144 /* Check all the superblock fields we care about when writing one out. */
146 xfs_validate_sb_write(
147 struct xfs_mount
*mp
,
152 * Carry out additional sb summary counter sanity checks when we write
153 * the superblock. We skip this in the read validator because there
154 * could be newer superblocks in the log and if the values are garbage
155 * even after replay we'll recalculate them at the end of log mount.
157 * mkfs has traditionally written zeroed counters to inprogress and
158 * secondary superblocks, so allow this usage to continue because
159 * we never read counters from such superblocks.
161 if (XFS_BUF_ADDR(bp
) == XFS_SB_DADDR
&& !sbp
->sb_inprogress
&&
162 (sbp
->sb_fdblocks
> sbp
->sb_dblocks
||
163 !xfs_verify_icount(mp
, sbp
->sb_icount
) ||
164 sbp
->sb_ifree
> sbp
->sb_icount
)) {
165 xfs_warn(mp
, "SB summary counter sanity check failed");
166 return -EFSCORRUPTED
;
169 if (XFS_SB_VERSION_NUM(sbp
) != XFS_SB_VERSION_5
)
173 * Version 5 superblock feature mask validation. Reject combinations
174 * the kernel cannot support since we checked for unsupported bits in
175 * the read verifier, which means that memory is corrupt.
177 if (xfs_sb_has_compat_feature(sbp
, XFS_SB_FEAT_COMPAT_UNKNOWN
)) {
179 "Corruption detected in superblock compatible features (0x%x)!",
180 (sbp
->sb_features_compat
& XFS_SB_FEAT_COMPAT_UNKNOWN
));
181 return -EFSCORRUPTED
;
184 if (xfs_sb_has_ro_compat_feature(sbp
, XFS_SB_FEAT_RO_COMPAT_UNKNOWN
)) {
186 "Corruption detected in superblock read-only compatible features (0x%x)!",
187 (sbp
->sb_features_ro_compat
&
188 XFS_SB_FEAT_RO_COMPAT_UNKNOWN
));
189 return -EFSCORRUPTED
;
191 if (xfs_sb_has_incompat_feature(sbp
, XFS_SB_FEAT_INCOMPAT_UNKNOWN
)) {
193 "Corruption detected in superblock incompatible features (0x%x)!",
194 (sbp
->sb_features_incompat
&
195 XFS_SB_FEAT_INCOMPAT_UNKNOWN
));
196 return -EFSCORRUPTED
;
198 if (xfs_sb_has_incompat_log_feature(sbp
,
199 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN
)) {
201 "Corruption detected in superblock incompatible log features (0x%x)!",
202 (sbp
->sb_features_log_incompat
&
203 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN
));
204 return -EFSCORRUPTED
;
208 * We can't read verify the sb LSN because the read verifier is called
209 * before the log is allocated and processed. We know the log is set up
210 * before write verifier calls, so check it here.
212 if (!xfs_log_check_lsn(mp
, sbp
->sb_lsn
))
213 return -EFSCORRUPTED
;
218 /* Check the validity of the SB. */
220 xfs_validate_sb_common(
221 struct xfs_mount
*mp
,
225 struct xfs_dsb
*dsb
= XFS_BUF_TO_SBP(bp
);
226 uint32_t agcount
= 0;
229 if (!xfs_verify_magic(bp
, dsb
->sb_magicnum
)) {
230 xfs_warn(mp
, "bad magic number");
234 if (!xfs_sb_good_version(sbp
)) {
235 xfs_warn(mp
, "bad version");
239 if (xfs_sb_version_has_pquotino(sbp
)) {
240 if (sbp
->sb_qflags
& (XFS_OQUOTA_ENFD
| XFS_OQUOTA_CHKD
)) {
242 "Version 5 of Super block has XFS_OQUOTA bits.");
243 return -EFSCORRUPTED
;
245 } else if (sbp
->sb_qflags
& (XFS_PQUOTA_ENFD
| XFS_GQUOTA_ENFD
|
246 XFS_PQUOTA_CHKD
| XFS_GQUOTA_CHKD
)) {
248 "Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.");
249 return -EFSCORRUPTED
;
253 * Full inode chunks must be aligned to inode chunk size when
254 * sparse inodes are enabled to support the sparse chunk
255 * allocation algorithm and prevent overlapping inode records.
257 if (xfs_sb_version_hassparseinodes(sbp
)) {
260 align
= XFS_INODES_PER_CHUNK
* sbp
->sb_inodesize
262 if (sbp
->sb_inoalignmt
!= align
) {
264 "Inode block alignment (%u) must match chunk size (%u) for sparse inodes.",
265 sbp
->sb_inoalignmt
, align
);
271 sbp
->sb_logstart
== 0 && mp
->m_logdev_targp
== mp
->m_ddev_targp
)) {
273 "filesystem is marked as having an external log; "
274 "specify logdev on the mount command line.");
279 sbp
->sb_logstart
!= 0 && mp
->m_logdev_targp
!= mp
->m_ddev_targp
)) {
281 "filesystem is marked as having an internal log; "
282 "do not specify logdev on the mount command line.");
286 /* Compute agcount for this number of dblocks and agblocks */
287 if (sbp
->sb_agblocks
) {
288 agcount
= div_u64_rem(sbp
->sb_dblocks
, sbp
->sb_agblocks
, &rem
);
294 * More sanity checking. Most of these were stolen directly from
298 sbp
->sb_agcount
<= 0 ||
299 sbp
->sb_sectsize
< XFS_MIN_SECTORSIZE
||
300 sbp
->sb_sectsize
> XFS_MAX_SECTORSIZE
||
301 sbp
->sb_sectlog
< XFS_MIN_SECTORSIZE_LOG
||
302 sbp
->sb_sectlog
> XFS_MAX_SECTORSIZE_LOG
||
303 sbp
->sb_sectsize
!= (1 << sbp
->sb_sectlog
) ||
304 sbp
->sb_blocksize
< XFS_MIN_BLOCKSIZE
||
305 sbp
->sb_blocksize
> XFS_MAX_BLOCKSIZE
||
306 sbp
->sb_blocklog
< XFS_MIN_BLOCKSIZE_LOG
||
307 sbp
->sb_blocklog
> XFS_MAX_BLOCKSIZE_LOG
||
308 sbp
->sb_blocksize
!= (1 << sbp
->sb_blocklog
) ||
309 sbp
->sb_dirblklog
+ sbp
->sb_blocklog
> XFS_MAX_BLOCKSIZE_LOG
||
310 sbp
->sb_inodesize
< XFS_DINODE_MIN_SIZE
||
311 sbp
->sb_inodesize
> XFS_DINODE_MAX_SIZE
||
312 sbp
->sb_inodelog
< XFS_DINODE_MIN_LOG
||
313 sbp
->sb_inodelog
> XFS_DINODE_MAX_LOG
||
314 sbp
->sb_inodesize
!= (1 << sbp
->sb_inodelog
) ||
315 sbp
->sb_logsunit
> XLOG_MAX_RECORD_BSIZE
||
316 sbp
->sb_inopblock
!= howmany(sbp
->sb_blocksize
,sbp
->sb_inodesize
) ||
317 XFS_FSB_TO_B(mp
, sbp
->sb_agblocks
) < XFS_MIN_AG_BYTES
||
318 XFS_FSB_TO_B(mp
, sbp
->sb_agblocks
) > XFS_MAX_AG_BYTES
||
319 sbp
->sb_agblklog
!= xfs_highbit32(sbp
->sb_agblocks
- 1) + 1 ||
320 agcount
== 0 || agcount
!= sbp
->sb_agcount
||
321 (sbp
->sb_blocklog
- sbp
->sb_inodelog
!= sbp
->sb_inopblog
) ||
322 (sbp
->sb_rextsize
* sbp
->sb_blocksize
> XFS_MAX_RTEXTSIZE
) ||
323 (sbp
->sb_rextsize
* sbp
->sb_blocksize
< XFS_MIN_RTEXTSIZE
) ||
324 (sbp
->sb_imax_pct
> 100 /* zero sb_imax_pct is valid */) ||
325 sbp
->sb_dblocks
== 0 ||
326 sbp
->sb_dblocks
> XFS_MAX_DBLOCKS(sbp
) ||
327 sbp
->sb_dblocks
< XFS_MIN_DBLOCKS(sbp
) ||
328 sbp
->sb_shared_vn
!= 0)) {
329 xfs_notice(mp
, "SB sanity check failed");
330 return -EFSCORRUPTED
;
334 if (!xfs_sb_version_hasdalign(sbp
) ||
335 sbp
->sb_unit
> sbp
->sb_width
||
336 (sbp
->sb_width
% sbp
->sb_unit
) != 0) {
337 xfs_notice(mp
, "SB stripe unit sanity check failed");
338 return -EFSCORRUPTED
;
340 } else if (xfs_sb_version_hasdalign(sbp
)) {
341 xfs_notice(mp
, "SB stripe alignment sanity check failed");
342 return -EFSCORRUPTED
;
343 } else if (sbp
->sb_width
) {
344 xfs_notice(mp
, "SB stripe width sanity check failed");
345 return -EFSCORRUPTED
;
349 if (xfs_sb_version_hascrc(&mp
->m_sb
) &&
350 sbp
->sb_blocksize
< XFS_MIN_CRC_BLOCKSIZE
) {
351 xfs_notice(mp
, "v5 SB sanity check failed");
352 return -EFSCORRUPTED
;
356 * Currently only very few inode sizes are supported.
358 switch (sbp
->sb_inodesize
) {
365 xfs_warn(mp
, "inode size of %d bytes not supported",
370 if (xfs_sb_validate_fsb_count(sbp
, sbp
->sb_dblocks
) ||
371 xfs_sb_validate_fsb_count(sbp
, sbp
->sb_rblocks
)) {
373 "file system too large to be mounted on this system.");
381 xfs_sb_quota_from_disk(struct xfs_sb
*sbp
)
384 * older mkfs doesn't initialize quota inodes to NULLFSINO. This
385 * leads to in-core values having two different values for a quota
386 * inode to be invalid: 0 and NULLFSINO. Change it to a single value
389 * Note that this change affect only the in-core values. These
390 * values are not written back to disk unless any quota information
391 * is written to the disk. Even in that case, sb_pquotino field is
392 * not written to disk unless the superblock supports pquotino.
394 if (sbp
->sb_uquotino
== 0)
395 sbp
->sb_uquotino
= NULLFSINO
;
396 if (sbp
->sb_gquotino
== 0)
397 sbp
->sb_gquotino
= NULLFSINO
;
398 if (sbp
->sb_pquotino
== 0)
399 sbp
->sb_pquotino
= NULLFSINO
;
402 * We need to do these manipilations only if we are working
403 * with an older version of on-disk superblock.
405 if (xfs_sb_version_has_pquotino(sbp
))
408 if (sbp
->sb_qflags
& XFS_OQUOTA_ENFD
)
409 sbp
->sb_qflags
|= (sbp
->sb_qflags
& XFS_PQUOTA_ACCT
) ?
410 XFS_PQUOTA_ENFD
: XFS_GQUOTA_ENFD
;
411 if (sbp
->sb_qflags
& XFS_OQUOTA_CHKD
)
412 sbp
->sb_qflags
|= (sbp
->sb_qflags
& XFS_PQUOTA_ACCT
) ?
413 XFS_PQUOTA_CHKD
: XFS_GQUOTA_CHKD
;
414 sbp
->sb_qflags
&= ~(XFS_OQUOTA_ENFD
| XFS_OQUOTA_CHKD
);
416 if (sbp
->sb_qflags
& XFS_PQUOTA_ACCT
&&
417 sbp
->sb_gquotino
!= NULLFSINO
) {
419 * In older version of superblock, on-disk superblock only
420 * has sb_gquotino, and in-core superblock has both sb_gquotino
421 * and sb_pquotino. But, only one of them is supported at any
422 * point of time. So, if PQUOTA is set in disk superblock,
423 * copy over sb_gquotino to sb_pquotino. The NULLFSINO test
424 * above is to make sure we don't do this twice and wipe them
427 sbp
->sb_pquotino
= sbp
->sb_gquotino
;
428 sbp
->sb_gquotino
= NULLFSINO
;
438 to
->sb_magicnum
= be32_to_cpu(from
->sb_magicnum
);
439 to
->sb_blocksize
= be32_to_cpu(from
->sb_blocksize
);
440 to
->sb_dblocks
= be64_to_cpu(from
->sb_dblocks
);
441 to
->sb_rblocks
= be64_to_cpu(from
->sb_rblocks
);
442 to
->sb_rextents
= be64_to_cpu(from
->sb_rextents
);
443 memcpy(&to
->sb_uuid
, &from
->sb_uuid
, sizeof(to
->sb_uuid
));
444 to
->sb_logstart
= be64_to_cpu(from
->sb_logstart
);
445 to
->sb_rootino
= be64_to_cpu(from
->sb_rootino
);
446 to
->sb_rbmino
= be64_to_cpu(from
->sb_rbmino
);
447 to
->sb_rsumino
= be64_to_cpu(from
->sb_rsumino
);
448 to
->sb_rextsize
= be32_to_cpu(from
->sb_rextsize
);
449 to
->sb_agblocks
= be32_to_cpu(from
->sb_agblocks
);
450 to
->sb_agcount
= be32_to_cpu(from
->sb_agcount
);
451 to
->sb_rbmblocks
= be32_to_cpu(from
->sb_rbmblocks
);
452 to
->sb_logblocks
= be32_to_cpu(from
->sb_logblocks
);
453 to
->sb_versionnum
= be16_to_cpu(from
->sb_versionnum
);
454 to
->sb_sectsize
= be16_to_cpu(from
->sb_sectsize
);
455 to
->sb_inodesize
= be16_to_cpu(from
->sb_inodesize
);
456 to
->sb_inopblock
= be16_to_cpu(from
->sb_inopblock
);
457 memcpy(&to
->sb_fname
, &from
->sb_fname
, sizeof(to
->sb_fname
));
458 to
->sb_blocklog
= from
->sb_blocklog
;
459 to
->sb_sectlog
= from
->sb_sectlog
;
460 to
->sb_inodelog
= from
->sb_inodelog
;
461 to
->sb_inopblog
= from
->sb_inopblog
;
462 to
->sb_agblklog
= from
->sb_agblklog
;
463 to
->sb_rextslog
= from
->sb_rextslog
;
464 to
->sb_inprogress
= from
->sb_inprogress
;
465 to
->sb_imax_pct
= from
->sb_imax_pct
;
466 to
->sb_icount
= be64_to_cpu(from
->sb_icount
);
467 to
->sb_ifree
= be64_to_cpu(from
->sb_ifree
);
468 to
->sb_fdblocks
= be64_to_cpu(from
->sb_fdblocks
);
469 to
->sb_frextents
= be64_to_cpu(from
->sb_frextents
);
470 to
->sb_uquotino
= be64_to_cpu(from
->sb_uquotino
);
471 to
->sb_gquotino
= be64_to_cpu(from
->sb_gquotino
);
472 to
->sb_qflags
= be16_to_cpu(from
->sb_qflags
);
473 to
->sb_flags
= from
->sb_flags
;
474 to
->sb_shared_vn
= from
->sb_shared_vn
;
475 to
->sb_inoalignmt
= be32_to_cpu(from
->sb_inoalignmt
);
476 to
->sb_unit
= be32_to_cpu(from
->sb_unit
);
477 to
->sb_width
= be32_to_cpu(from
->sb_width
);
478 to
->sb_dirblklog
= from
->sb_dirblklog
;
479 to
->sb_logsectlog
= from
->sb_logsectlog
;
480 to
->sb_logsectsize
= be16_to_cpu(from
->sb_logsectsize
);
481 to
->sb_logsunit
= be32_to_cpu(from
->sb_logsunit
);
482 to
->sb_features2
= be32_to_cpu(from
->sb_features2
);
483 to
->sb_bad_features2
= be32_to_cpu(from
->sb_bad_features2
);
484 to
->sb_features_compat
= be32_to_cpu(from
->sb_features_compat
);
485 to
->sb_features_ro_compat
= be32_to_cpu(from
->sb_features_ro_compat
);
486 to
->sb_features_incompat
= be32_to_cpu(from
->sb_features_incompat
);
487 to
->sb_features_log_incompat
=
488 be32_to_cpu(from
->sb_features_log_incompat
);
489 /* crc is only used on disk, not in memory; just init to 0 here. */
491 to
->sb_spino_align
= be32_to_cpu(from
->sb_spino_align
);
492 to
->sb_pquotino
= be64_to_cpu(from
->sb_pquotino
);
493 to
->sb_lsn
= be64_to_cpu(from
->sb_lsn
);
495 * sb_meta_uuid is only on disk if it differs from sb_uuid and the
496 * feature flag is set; if not set we keep it only in memory.
498 if (xfs_sb_version_hasmetauuid(to
))
499 uuid_copy(&to
->sb_meta_uuid
, &from
->sb_meta_uuid
);
501 uuid_copy(&to
->sb_meta_uuid
, &from
->sb_uuid
);
502 /* Convert on-disk flags to in-memory flags? */
504 xfs_sb_quota_from_disk(to
);
512 __xfs_sb_from_disk(to
, from
, true);
516 xfs_sb_quota_to_disk(
520 uint16_t qflags
= from
->sb_qflags
;
522 to
->sb_uquotino
= cpu_to_be64(from
->sb_uquotino
);
523 if (xfs_sb_version_has_pquotino(from
)) {
524 to
->sb_qflags
= cpu_to_be16(from
->sb_qflags
);
525 to
->sb_gquotino
= cpu_to_be64(from
->sb_gquotino
);
526 to
->sb_pquotino
= cpu_to_be64(from
->sb_pquotino
);
531 * The in-core version of sb_qflags do not have XFS_OQUOTA_*
532 * flags, whereas the on-disk version does. So, convert incore
533 * XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags.
535 qflags
&= ~(XFS_PQUOTA_ENFD
| XFS_PQUOTA_CHKD
|
536 XFS_GQUOTA_ENFD
| XFS_GQUOTA_CHKD
);
538 if (from
->sb_qflags
&
539 (XFS_PQUOTA_ENFD
| XFS_GQUOTA_ENFD
))
540 qflags
|= XFS_OQUOTA_ENFD
;
541 if (from
->sb_qflags
&
542 (XFS_PQUOTA_CHKD
| XFS_GQUOTA_CHKD
))
543 qflags
|= XFS_OQUOTA_CHKD
;
544 to
->sb_qflags
= cpu_to_be16(qflags
);
547 * GQUOTINO and PQUOTINO cannot be used together in versions
548 * of superblock that do not have pquotino. from->sb_flags
549 * tells us which quota is active and should be copied to
550 * disk. If neither are active, we should NULL the inode.
552 * In all cases, the separate pquotino must remain 0 because it
553 * it beyond the "end" of the valid non-pquotino superblock.
555 if (from
->sb_qflags
& XFS_GQUOTA_ACCT
)
556 to
->sb_gquotino
= cpu_to_be64(from
->sb_gquotino
);
557 else if (from
->sb_qflags
& XFS_PQUOTA_ACCT
)
558 to
->sb_gquotino
= cpu_to_be64(from
->sb_pquotino
);
561 * We can't rely on just the fields being logged to tell us
562 * that it is safe to write NULLFSINO - we should only do that
563 * if quotas are not actually enabled. Hence only write
564 * NULLFSINO if both in-core quota inodes are NULL.
566 if (from
->sb_gquotino
== NULLFSINO
&&
567 from
->sb_pquotino
== NULLFSINO
)
568 to
->sb_gquotino
= cpu_to_be64(NULLFSINO
);
579 xfs_sb_quota_to_disk(to
, from
);
581 to
->sb_magicnum
= cpu_to_be32(from
->sb_magicnum
);
582 to
->sb_blocksize
= cpu_to_be32(from
->sb_blocksize
);
583 to
->sb_dblocks
= cpu_to_be64(from
->sb_dblocks
);
584 to
->sb_rblocks
= cpu_to_be64(from
->sb_rblocks
);
585 to
->sb_rextents
= cpu_to_be64(from
->sb_rextents
);
586 memcpy(&to
->sb_uuid
, &from
->sb_uuid
, sizeof(to
->sb_uuid
));
587 to
->sb_logstart
= cpu_to_be64(from
->sb_logstart
);
588 to
->sb_rootino
= cpu_to_be64(from
->sb_rootino
);
589 to
->sb_rbmino
= cpu_to_be64(from
->sb_rbmino
);
590 to
->sb_rsumino
= cpu_to_be64(from
->sb_rsumino
);
591 to
->sb_rextsize
= cpu_to_be32(from
->sb_rextsize
);
592 to
->sb_agblocks
= cpu_to_be32(from
->sb_agblocks
);
593 to
->sb_agcount
= cpu_to_be32(from
->sb_agcount
);
594 to
->sb_rbmblocks
= cpu_to_be32(from
->sb_rbmblocks
);
595 to
->sb_logblocks
= cpu_to_be32(from
->sb_logblocks
);
596 to
->sb_versionnum
= cpu_to_be16(from
->sb_versionnum
);
597 to
->sb_sectsize
= cpu_to_be16(from
->sb_sectsize
);
598 to
->sb_inodesize
= cpu_to_be16(from
->sb_inodesize
);
599 to
->sb_inopblock
= cpu_to_be16(from
->sb_inopblock
);
600 memcpy(&to
->sb_fname
, &from
->sb_fname
, sizeof(to
->sb_fname
));
601 to
->sb_blocklog
= from
->sb_blocklog
;
602 to
->sb_sectlog
= from
->sb_sectlog
;
603 to
->sb_inodelog
= from
->sb_inodelog
;
604 to
->sb_inopblog
= from
->sb_inopblog
;
605 to
->sb_agblklog
= from
->sb_agblklog
;
606 to
->sb_rextslog
= from
->sb_rextslog
;
607 to
->sb_inprogress
= from
->sb_inprogress
;
608 to
->sb_imax_pct
= from
->sb_imax_pct
;
609 to
->sb_icount
= cpu_to_be64(from
->sb_icount
);
610 to
->sb_ifree
= cpu_to_be64(from
->sb_ifree
);
611 to
->sb_fdblocks
= cpu_to_be64(from
->sb_fdblocks
);
612 to
->sb_frextents
= cpu_to_be64(from
->sb_frextents
);
614 to
->sb_flags
= from
->sb_flags
;
615 to
->sb_shared_vn
= from
->sb_shared_vn
;
616 to
->sb_inoalignmt
= cpu_to_be32(from
->sb_inoalignmt
);
617 to
->sb_unit
= cpu_to_be32(from
->sb_unit
);
618 to
->sb_width
= cpu_to_be32(from
->sb_width
);
619 to
->sb_dirblklog
= from
->sb_dirblklog
;
620 to
->sb_logsectlog
= from
->sb_logsectlog
;
621 to
->sb_logsectsize
= cpu_to_be16(from
->sb_logsectsize
);
622 to
->sb_logsunit
= cpu_to_be32(from
->sb_logsunit
);
625 * We need to ensure that bad_features2 always matches features2.
626 * Hence we enforce that here rather than having to remember to do it
627 * everywhere else that updates features2.
629 from
->sb_bad_features2
= from
->sb_features2
;
630 to
->sb_features2
= cpu_to_be32(from
->sb_features2
);
631 to
->sb_bad_features2
= cpu_to_be32(from
->sb_bad_features2
);
633 if (xfs_sb_version_hascrc(from
)) {
634 to
->sb_features_compat
= cpu_to_be32(from
->sb_features_compat
);
635 to
->sb_features_ro_compat
=
636 cpu_to_be32(from
->sb_features_ro_compat
);
637 to
->sb_features_incompat
=
638 cpu_to_be32(from
->sb_features_incompat
);
639 to
->sb_features_log_incompat
=
640 cpu_to_be32(from
->sb_features_log_incompat
);
641 to
->sb_spino_align
= cpu_to_be32(from
->sb_spino_align
);
642 to
->sb_lsn
= cpu_to_be64(from
->sb_lsn
);
643 if (xfs_sb_version_hasmetauuid(from
))
644 uuid_copy(&to
->sb_meta_uuid
, &from
->sb_meta_uuid
);
649 * If the superblock has the CRC feature bit set or the CRC field is non-null,
650 * check that the CRC is valid. We check the CRC field is non-null because a
651 * single bit error could clear the feature bit and unused parts of the
652 * superblock are supposed to be zero. Hence a non-null crc field indicates that
653 * we've potentially lost a feature bit and we should check it anyway.
655 * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the
656 * last field in V4 secondary superblocks. So for secondary superblocks,
657 * we are more forgiving, and ignore CRC failures if the primary doesn't
658 * indicate that the fs version is V5.
665 struct xfs_mount
*mp
= bp
->b_target
->bt_mount
;
666 struct xfs_dsb
*dsb
= XFS_BUF_TO_SBP(bp
);
670 * open code the version check to avoid needing to convert the entire
671 * superblock from disk order just to check the version number
673 if (dsb
->sb_magicnum
== cpu_to_be32(XFS_SB_MAGIC
) &&
674 (((be16_to_cpu(dsb
->sb_versionnum
) & XFS_SB_VERSION_NUMBITS
) ==
678 if (!xfs_buf_verify_cksum(bp
, XFS_SB_CRC_OFF
)) {
679 /* Only fail bad secondaries on a known V5 filesystem */
680 if (bp
->b_bn
== XFS_SB_DADDR
||
681 xfs_sb_version_hascrc(&mp
->m_sb
)) {
689 * Check all the superblock fields. Don't byteswap the xquota flags
690 * because _verify_common checks the on-disk values.
692 __xfs_sb_from_disk(&sb
, XFS_BUF_TO_SBP(bp
), false);
693 error
= xfs_validate_sb_common(mp
, bp
, &sb
);
696 error
= xfs_validate_sb_read(mp
, &sb
);
699 if (error
== -EFSCORRUPTED
|| error
== -EFSBADCRC
)
700 xfs_verifier_error(bp
, error
, __this_address
);
702 xfs_buf_ioerror(bp
, error
);
706 * We may be probed for a filesystem match, so we may not want to emit
707 * messages when the superblock buffer is not actually an XFS superblock.
708 * If we find an XFS superblock, then run a normal, noisy mount because we are
709 * really going to mount it and want to know about errors.
712 xfs_sb_quiet_read_verify(
715 struct xfs_dsb
*dsb
= XFS_BUF_TO_SBP(bp
);
717 if (dsb
->sb_magicnum
== cpu_to_be32(XFS_SB_MAGIC
)) {
718 /* XFS filesystem, verify noisily! */
719 xfs_sb_read_verify(bp
);
723 xfs_buf_ioerror(bp
, -EWRONGFS
);
731 struct xfs_mount
*mp
= bp
->b_target
->bt_mount
;
732 struct xfs_buf_log_item
*bip
= bp
->b_log_item
;
736 * Check all the superblock fields. Don't byteswap the xquota flags
737 * because _verify_common checks the on-disk values.
739 __xfs_sb_from_disk(&sb
, XFS_BUF_TO_SBP(bp
), false);
740 error
= xfs_validate_sb_common(mp
, bp
, &sb
);
743 error
= xfs_validate_sb_write(mp
, bp
, &sb
);
747 if (!xfs_sb_version_hascrc(&mp
->m_sb
))
751 XFS_BUF_TO_SBP(bp
)->sb_lsn
= cpu_to_be64(bip
->bli_item
.li_lsn
);
753 xfs_buf_update_cksum(bp
, XFS_SB_CRC_OFF
);
757 xfs_verifier_error(bp
, error
, __this_address
);
760 const struct xfs_buf_ops xfs_sb_buf_ops
= {
762 .magic
= { cpu_to_be32(XFS_SB_MAGIC
), cpu_to_be32(XFS_SB_MAGIC
) },
763 .verify_read
= xfs_sb_read_verify
,
764 .verify_write
= xfs_sb_write_verify
,
767 const struct xfs_buf_ops xfs_sb_quiet_buf_ops
= {
768 .name
= "xfs_sb_quiet",
769 .magic
= { cpu_to_be32(XFS_SB_MAGIC
), cpu_to_be32(XFS_SB_MAGIC
) },
770 .verify_read
= xfs_sb_quiet_read_verify
,
771 .verify_write
= xfs_sb_write_verify
,
777 * Mount initialization code establishing various mount
778 * fields from the superblock associated with the given
783 struct xfs_mount
*mp
,
786 mp
->m_agfrotor
= mp
->m_agirotor
= 0;
787 mp
->m_maxagi
= mp
->m_sb
.sb_agcount
;
788 mp
->m_blkbit_log
= sbp
->sb_blocklog
+ XFS_NBBYLOG
;
789 mp
->m_blkbb_log
= sbp
->sb_blocklog
- BBSHIFT
;
790 mp
->m_sectbb_log
= sbp
->sb_sectlog
- BBSHIFT
;
791 mp
->m_agno_log
= xfs_highbit32(sbp
->sb_agcount
- 1) + 1;
792 mp
->m_agino_log
= sbp
->sb_inopblog
+ sbp
->sb_agblklog
;
793 mp
->m_blockmask
= sbp
->sb_blocksize
- 1;
794 mp
->m_blockwsize
= sbp
->sb_blocksize
>> XFS_WORDLOG
;
795 mp
->m_blockwmask
= mp
->m_blockwsize
- 1;
797 mp
->m_alloc_mxr
[0] = xfs_allocbt_maxrecs(mp
, sbp
->sb_blocksize
, 1);
798 mp
->m_alloc_mxr
[1] = xfs_allocbt_maxrecs(mp
, sbp
->sb_blocksize
, 0);
799 mp
->m_alloc_mnr
[0] = mp
->m_alloc_mxr
[0] / 2;
800 mp
->m_alloc_mnr
[1] = mp
->m_alloc_mxr
[1] / 2;
802 mp
->m_inobt_mxr
[0] = xfs_inobt_maxrecs(mp
, sbp
->sb_blocksize
, 1);
803 mp
->m_inobt_mxr
[1] = xfs_inobt_maxrecs(mp
, sbp
->sb_blocksize
, 0);
804 mp
->m_inobt_mnr
[0] = mp
->m_inobt_mxr
[0] / 2;
805 mp
->m_inobt_mnr
[1] = mp
->m_inobt_mxr
[1] / 2;
807 mp
->m_bmap_dmxr
[0] = xfs_bmbt_maxrecs(mp
, sbp
->sb_blocksize
, 1);
808 mp
->m_bmap_dmxr
[1] = xfs_bmbt_maxrecs(mp
, sbp
->sb_blocksize
, 0);
809 mp
->m_bmap_dmnr
[0] = mp
->m_bmap_dmxr
[0] / 2;
810 mp
->m_bmap_dmnr
[1] = mp
->m_bmap_dmxr
[1] / 2;
812 mp
->m_rmap_mxr
[0] = xfs_rmapbt_maxrecs(sbp
->sb_blocksize
, 1);
813 mp
->m_rmap_mxr
[1] = xfs_rmapbt_maxrecs(sbp
->sb_blocksize
, 0);
814 mp
->m_rmap_mnr
[0] = mp
->m_rmap_mxr
[0] / 2;
815 mp
->m_rmap_mnr
[1] = mp
->m_rmap_mxr
[1] / 2;
817 mp
->m_refc_mxr
[0] = xfs_refcountbt_maxrecs(sbp
->sb_blocksize
, true);
818 mp
->m_refc_mxr
[1] = xfs_refcountbt_maxrecs(sbp
->sb_blocksize
, false);
819 mp
->m_refc_mnr
[0] = mp
->m_refc_mxr
[0] / 2;
820 mp
->m_refc_mnr
[1] = mp
->m_refc_mxr
[1] / 2;
822 mp
->m_bsize
= XFS_FSB_TO_BB(mp
, 1);
823 mp
->m_ialloc_inos
= max_t(uint16_t, XFS_INODES_PER_CHUNK
,
825 mp
->m_ialloc_blks
= mp
->m_ialloc_inos
>> sbp
->sb_inopblog
;
827 if (sbp
->sb_spino_align
)
828 mp
->m_ialloc_min_blks
= sbp
->sb_spino_align
;
830 mp
->m_ialloc_min_blks
= mp
->m_ialloc_blks
;
831 mp
->m_alloc_set_aside
= xfs_alloc_set_aside(mp
);
832 mp
->m_ag_max_usable
= xfs_alloc_ag_max_usable(mp
);
836 * xfs_initialize_perag_data
838 * Read in each per-ag structure so we can count up the number of
839 * allocated inodes, free inodes and used filesystem blocks as this
840 * information is no longer persistent in the superblock. Once we have
841 * this information, write it into the in-core superblock structure.
844 xfs_initialize_perag_data(
845 struct xfs_mount
*mp
,
846 xfs_agnumber_t agcount
)
848 xfs_agnumber_t index
;
850 xfs_sb_t
*sbp
= &mp
->m_sb
;
854 uint64_t bfreelst
= 0;
859 for (index
= 0; index
< agcount
; index
++) {
861 * read the agf, then the agi. This gets us
862 * all the information we need and populates the
863 * per-ag structures for us.
865 error
= xfs_alloc_pagf_init(mp
, NULL
, index
, 0);
869 error
= xfs_ialloc_pagi_init(mp
, NULL
, index
);
872 pag
= xfs_perag_get(mp
, index
);
873 ifree
+= pag
->pagi_freecount
;
874 ialloc
+= pag
->pagi_count
;
875 bfree
+= pag
->pagf_freeblks
;
876 bfreelst
+= pag
->pagf_flcount
;
877 btree
+= pag
->pagf_btreeblks
;
880 fdblocks
= bfree
+ bfreelst
+ btree
;
883 * If the new summary counts are obviously incorrect, fail the
884 * mount operation because that implies the AGFs are also corrupt.
885 * Clear BAD_SUMMARY so that we don't unmount with a dirty log, which
886 * will prevent xfs_repair from fixing anything.
888 if (fdblocks
> sbp
->sb_dblocks
|| ifree
> ialloc
) {
889 xfs_alert(mp
, "AGF corruption. Please run xfs_repair.");
890 error
= -EFSCORRUPTED
;
894 /* Overwrite incore superblock counters with just-read data */
895 spin_lock(&mp
->m_sb_lock
);
896 sbp
->sb_ifree
= ifree
;
897 sbp
->sb_icount
= ialloc
;
898 sbp
->sb_fdblocks
= fdblocks
;
899 spin_unlock(&mp
->m_sb_lock
);
901 xfs_reinit_percpu_counters(mp
);
903 mp
->m_flags
&= ~XFS_MOUNT_BAD_SUMMARY
;
908 * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock
909 * into the superblock buffer to be logged. It does not provide the higher
910 * level of locking that is needed to protect the in-core superblock from
915 struct xfs_trans
*tp
)
917 struct xfs_mount
*mp
= tp
->t_mountp
;
918 struct xfs_buf
*bp
= xfs_trans_getsb(tp
, mp
, 0);
920 mp
->m_sb
.sb_icount
= percpu_counter_sum(&mp
->m_icount
);
921 mp
->m_sb
.sb_ifree
= percpu_counter_sum(&mp
->m_ifree
);
922 mp
->m_sb
.sb_fdblocks
= percpu_counter_sum(&mp
->m_fdblocks
);
924 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp
), &mp
->m_sb
);
925 xfs_trans_buf_set_type(tp
, bp
, XFS_BLFT_SB_BUF
);
926 xfs_trans_log_buf(tp
, bp
, 0, sizeof(struct xfs_dsb
));
932 * Sync the superblock to disk.
934 * Note that the caller is responsible for checking the frozen state of the
935 * filesystem. This procedure uses the non-blocking transaction allocator and
936 * thus will allow modifications to a frozen fs. This is required because this
937 * code can be called during the process of freezing where use of the high-level
938 * allocator would deadlock.
942 struct xfs_mount
*mp
,
945 struct xfs_trans
*tp
;
948 error
= xfs_trans_alloc(mp
, &M_RES(mp
)->tr_sb
, 0, 0,
949 XFS_TRANS_NO_WRITECOUNT
, &tp
);
955 xfs_trans_set_sync(tp
);
956 return xfs_trans_commit(tp
);
960 * Update all the secondary superblocks to match the new state of the primary.
961 * Because we are completely overwriting all the existing fields in the
962 * secondary superblock buffers, there is no need to read them in from disk.
963 * Just get a new buffer, stamp it and write it.
965 * The sb buffers need to be cached here so that we serialise against other
966 * operations that access the secondary superblocks, but we don't want to keep
967 * them in memory once it is written so we mark it as a one-shot buffer.
970 xfs_update_secondary_sbs(
971 struct xfs_mount
*mp
)
976 LIST_HEAD (buffer_list
);
978 /* update secondary superblocks. */
979 for (agno
= 1; agno
< mp
->m_sb
.sb_agcount
; agno
++) {
982 bp
= xfs_buf_get(mp
->m_ddev_targp
,
983 XFS_AG_DADDR(mp
, agno
, XFS_SB_DADDR
),
984 XFS_FSS_TO_BB(mp
, 1), 0);
986 * If we get an error reading or writing alternate superblocks,
987 * continue. xfs_repair chooses the "best" superblock based
988 * on most matches; if we break early, we'll leave more
989 * superblocks un-updated than updated, and xfs_repair may
990 * pick them over the properly-updated primary.
994 "error allocating secondary superblock for ag %d",
997 saved_error
= -ENOMEM
;
1001 bp
->b_ops
= &xfs_sb_buf_ops
;
1002 xfs_buf_oneshot(bp
);
1003 xfs_buf_zero(bp
, 0, BBTOB(bp
->b_length
));
1004 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp
), &mp
->m_sb
);
1005 xfs_buf_delwri_queue(bp
, &buffer_list
);
1008 /* don't hold too many buffers at once */
1012 error
= xfs_buf_delwri_submit(&buffer_list
);
1015 "write error %d updating a secondary superblock near ag %d",
1018 saved_error
= error
;
1022 error
= xfs_buf_delwri_submit(&buffer_list
);
1025 "write error %d updating a secondary superblock near ag %d",
1029 return saved_error
? saved_error
: error
;
1033 * Same behavior as xfs_sync_sb, except that it is always synchronous and it
1034 * also writes the superblock buffer to disk sector 0 immediately.
1038 struct xfs_mount
*mp
)
1040 struct xfs_trans
*tp
;
1044 error
= xfs_trans_alloc(mp
, &M_RES(mp
)->tr_sb
, 0, 0, 0, &tp
);
1048 bp
= xfs_trans_getsb(tp
, mp
, 0);
1050 xfs_trans_bhold(tp
, bp
);
1051 xfs_trans_set_sync(tp
);
1052 error
= xfs_trans_commit(tp
);
1056 * write out the sb buffer to get the changes to disk
1058 error
= xfs_bwrite(bp
);
1067 struct xfs_fsop_geom
*geo
,
1070 memset(geo
, 0, sizeof(struct xfs_fsop_geom
));
1072 geo
->blocksize
= sbp
->sb_blocksize
;
1073 geo
->rtextsize
= sbp
->sb_rextsize
;
1074 geo
->agblocks
= sbp
->sb_agblocks
;
1075 geo
->agcount
= sbp
->sb_agcount
;
1076 geo
->logblocks
= sbp
->sb_logblocks
;
1077 geo
->sectsize
= sbp
->sb_sectsize
;
1078 geo
->inodesize
= sbp
->sb_inodesize
;
1079 geo
->imaxpct
= sbp
->sb_imax_pct
;
1080 geo
->datablocks
= sbp
->sb_dblocks
;
1081 geo
->rtblocks
= sbp
->sb_rblocks
;
1082 geo
->rtextents
= sbp
->sb_rextents
;
1083 geo
->logstart
= sbp
->sb_logstart
;
1084 BUILD_BUG_ON(sizeof(geo
->uuid
) != sizeof(sbp
->sb_uuid
));
1085 memcpy(geo
->uuid
, &sbp
->sb_uuid
, sizeof(sbp
->sb_uuid
));
1087 if (struct_version
< 2)
1090 geo
->sunit
= sbp
->sb_unit
;
1091 geo
->swidth
= sbp
->sb_width
;
1093 if (struct_version
< 3)
1096 geo
->version
= XFS_FSOP_GEOM_VERSION
;
1097 geo
->flags
= XFS_FSOP_GEOM_FLAGS_NLINK
|
1098 XFS_FSOP_GEOM_FLAGS_DIRV2
|
1099 XFS_FSOP_GEOM_FLAGS_EXTFLG
;
1100 if (xfs_sb_version_hasattr(sbp
))
1101 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_ATTR
;
1102 if (xfs_sb_version_hasquota(sbp
))
1103 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_QUOTA
;
1104 if (xfs_sb_version_hasalign(sbp
))
1105 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_IALIGN
;
1106 if (xfs_sb_version_hasdalign(sbp
))
1107 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_DALIGN
;
1108 if (xfs_sb_version_hassector(sbp
))
1109 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_SECTOR
;
1110 if (xfs_sb_version_hasasciici(sbp
))
1111 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_DIRV2CI
;
1112 if (xfs_sb_version_haslazysbcount(sbp
))
1113 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_LAZYSB
;
1114 if (xfs_sb_version_hasattr2(sbp
))
1115 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_ATTR2
;
1116 if (xfs_sb_version_hasprojid32bit(sbp
))
1117 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_PROJID32
;
1118 if (xfs_sb_version_hascrc(sbp
))
1119 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_V5SB
;
1120 if (xfs_sb_version_hasftype(sbp
))
1121 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_FTYPE
;
1122 if (xfs_sb_version_hasfinobt(sbp
))
1123 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_FINOBT
;
1124 if (xfs_sb_version_hassparseinodes(sbp
))
1125 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_SPINODES
;
1126 if (xfs_sb_version_hasrmapbt(sbp
))
1127 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_RMAPBT
;
1128 if (xfs_sb_version_hasreflink(sbp
))
1129 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_REFLINK
;
1130 if (xfs_sb_version_hassector(sbp
))
1131 geo
->logsectsize
= sbp
->sb_logsectsize
;
1133 geo
->logsectsize
= BBSIZE
;
1134 geo
->rtsectsize
= sbp
->sb_blocksize
;
1135 geo
->dirblocksize
= xfs_dir2_dirblock_bytes(sbp
);
1137 if (struct_version
< 4)
1140 if (xfs_sb_version_haslogv2(sbp
))
1141 geo
->flags
|= XFS_FSOP_GEOM_FLAGS_LOGV2
;
1143 geo
->logsunit
= sbp
->sb_logsunit
;
1148 /* Read a secondary superblock. */
1150 xfs_sb_read_secondary(
1151 struct xfs_mount
*mp
,
1152 struct xfs_trans
*tp
,
1153 xfs_agnumber_t agno
,
1154 struct xfs_buf
**bpp
)
1159 ASSERT(agno
!= 0 && agno
!= NULLAGNUMBER
);
1160 error
= xfs_trans_read_buf(mp
, tp
, mp
->m_ddev_targp
,
1161 XFS_AG_DADDR(mp
, agno
, XFS_SB_BLOCK(mp
)),
1162 XFS_FSS_TO_BB(mp
, 1), 0, &bp
, &xfs_sb_buf_ops
);
1165 xfs_buf_set_ref(bp
, XFS_SSB_REF
);
1170 /* Get an uninitialised secondary superblock buffer. */
1172 xfs_sb_get_secondary(
1173 struct xfs_mount
*mp
,
1174 struct xfs_trans
*tp
,
1175 xfs_agnumber_t agno
,
1176 struct xfs_buf
**bpp
)
1180 ASSERT(agno
!= 0 && agno
!= NULLAGNUMBER
);
1181 bp
= xfs_trans_get_buf(tp
, mp
->m_ddev_targp
,
1182 XFS_AG_DADDR(mp
, agno
, XFS_SB_BLOCK(mp
)),
1183 XFS_FSS_TO_BB(mp
, 1), 0);
1186 bp
->b_ops
= &xfs_sb_buf_ops
;
1187 xfs_buf_oneshot(bp
);