return xfs_btree_update(cur, &rec);
}
+/* Convert the ondisk btree record to its incore representation. */
+void
+xfs_alloc_btrec_to_irec(
+ const union xfs_btree_rec *rec,
+ struct xfs_alloc_rec_incore *irec)
+{
+ irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
+ irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
+}
+
+/* Simple checks for free space records. */
+xfs_failaddr_t
+xfs_alloc_check_irec(
+ struct xfs_btree_cur *cur,
+ const struct xfs_alloc_rec_incore *irec)
+{
+ struct xfs_perag *pag = cur->bc_ag.pag;
+
+ if (irec->ar_blockcount == 0)
+ return __this_address;
+
+ /* check for valid extent range, including overflow */
+ if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
+ return __this_address;
+
+ return NULL;
+}
+
+static inline int
+xfs_alloc_complain_bad_rec(
+ struct xfs_btree_cur *cur,
+ xfs_failaddr_t fa,
+ const struct xfs_alloc_rec_incore *irec)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+
+ xfs_warn(mp,
+ "%s Freespace BTree record corruption in AG %d detected at %pS!",
+ cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
+ cur->bc_ag.pag->pag_agno, fa);
+ xfs_warn(mp,
+ "start block 0x%x block count 0x%x", irec->ar_startblock,
+ irec->ar_blockcount);
+ return -EFSCORRUPTED;
+}
+
/*
* Get the data from the pointed-to record.
*/
xfs_extlen_t *len, /* output: length of extent */
int *stat) /* output: success/failure */
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_alloc_rec_incore irec;
union xfs_btree_rec *rec;
+ xfs_failaddr_t fa;
int error;
error = xfs_btree_get_rec(cur, &rec, stat);
if (error || !(*stat))
return error;
- *bno = be32_to_cpu(rec->alloc.ar_startblock);
- *len = be32_to_cpu(rec->alloc.ar_blockcount);
-
- if (*len == 0)
- goto out_bad_rec;
-
- /* check for valid extent range, including overflow */
- if (!xfs_verify_agbext(pag, *bno, *len))
- goto out_bad_rec;
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ fa = xfs_alloc_check_irec(cur, &irec);
+ if (fa)
+ return xfs_alloc_complain_bad_rec(cur, fa, &irec);
+ *bno = irec.ar_startblock;
+ *len = irec.ar_blockcount;
return 0;
-
-out_bad_rec:
- xfs_warn(mp,
- "%s Freespace BTree record corruption in AG %d detected!",
- cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
- pag->pag_agno);
- xfs_warn(mp,
- "start block 0x%x block count 0x%x", *bno, *len);
- return -EFSCORRUPTED;
}
/*
return 0;
}
+/*
+ * We do not verify the AGFL contents against AGF-based index counters here,
+ * even though we may have access to the perag that contains shadow copies. We
+ * don't know if the AGF based counters have been checked, and if they have they
+ * still may be inconsistent because they haven't yet been reset on the first
+ * allocation after the AGF has been read in.
+ *
+ * This means we can only check that all agfl entries contain valid or null
+ * values because we can't reliably determine the active range to exclude
+ * NULLAGBNO as a valid value.
+ *
+ * However, we can't even do that for v4 format filesystems because there are
+ * old versions of mkfs out there that does not initialise the AGFL to known,
+ * verifiable values. HEnce we can't tell the difference between a AGFL block
+ * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
+ *
+ * As a result, we can only fully validate AGFL block numbers when we pull them
+ * from the freelist in xfs_alloc_get_freelist().
+ */
static xfs_failaddr_t
xfs_agfl_verify(
struct xfs_buf *bp)
__be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
int i;
- /*
- * There is no verification of non-crc AGFLs because mkfs does not
- * initialise the AGFL to zero or NULL. Hence the only valid part of the
- * AGFL is what the AGF says is active. We can't get to the AGF, so we
- * can't verify just those entries are valid.
- */
if (!xfs_has_crc(mp))
return NULL;
}
/*
- * Check the agfl fields of the agf for inconsistency or corruption. The purpose
- * is to detect an agfl header padding mismatch between current and early v5
- * kernels. This problem manifests as a 1-slot size difference between the
- * on-disk flcount and the active [first, last] range of a wrapped agfl. This
- * may also catch variants of agfl count corruption unrelated to padding. Either
- * way, we'll reset the agfl and warn the user.
+ * Check the agfl fields of the agf for inconsistency or corruption.
+ *
+ * The original purpose was to detect an agfl header padding mismatch between
+ * current and early v5 kernels. This problem manifests as a 1-slot size
+ * difference between the on-disk flcount and the active [first, last] range of
+ * a wrapped agfl.
+ *
+ * However, we need to use these same checks to catch agfl count corruptions
+ * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
+ * way, we need to reset the agfl and warn the user.
*
* Return true if a reset is required before the agfl can be used, false
* otherwise.
int agfl_size = xfs_agfl_size(mp);
int active;
- /* no agfl header on v4 supers */
- if (!xfs_has_crc(mp))
- return false;
-
/*
* The agf read verifier catches severe corruption of these fields.
* Repeat some sanity checks to cover a packed -> unpacked mismatch if
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
+ xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
}
bool skip_discard)
{
struct xfs_extent_free_item *xefi;
-#ifdef DEBUG
struct xfs_mount *mp = tp->t_mountp;
+#ifdef DEBUG
xfs_agnumber_t agno;
xfs_agblock_t agbno;
} else {
xefi->xefi_owner = XFS_RMAP_OWN_NULL;
}
- trace_xfs_bmap_free_defer(tp->t_mountp,
+ trace_xfs_bmap_free_defer(mp,
XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
+
+ xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
}
*/
agfl_bno = xfs_buf_to_agfl_bno(agflbp);
bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
+ if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
+ return -EFSCORRUPTED;
+
be32_add_cpu(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
return 0;
}
+/*
+ * Verify the AGF is consistent.
+ *
+ * We do not verify the AGFL indexes in the AGF are fully consistent here
+ * because of issues with variable on-disk structure sizes. Instead, we check
+ * the agfl indexes for consistency when we initialise the perag from the AGF
+ * information after a read completes.
+ *
+ * If the index is inconsistent, then we mark the perag as needing an AGFL
+ * reset. The first AGFL update performed then resets the AGFL indexes and
+ * refills the AGFL with known good free blocks, allowing the filesystem to
+ * continue operating normally at the cost of a few leaked free space blocks.
+ */
static xfs_failaddr_t
xfs_agf_verify(
struct xfs_buf *bp)
return __this_address;
return NULL;
-
}
static void
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
if (xfs_agfl_needs_reset(pag->pag_mount, agf))
set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
+ else
+ clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
/*
* Update the in-core allocbt counter. Filter out the rmapbt
static int
xfs_alloc_vextent_check_args(
struct xfs_alloc_arg *args,
- xfs_fsblock_t target)
+ xfs_fsblock_t target,
+ xfs_agnumber_t *minimum_agno)
{
struct xfs_mount *mp = args->mp;
xfs_agblock_t agsize;
- args->agbno = NULLAGBLOCK;
args->fsbno = NULLFSBLOCK;
+ *minimum_agno = 0;
+ if (args->tp->t_highest_agno != NULLAGNUMBER)
+ *minimum_agno = args->tp->t_highest_agno;
+
/*
* Just fix this up, for the case where the last a.g. is shorter
* (or there's only one a.g.) and the caller couldn't easily figure
XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
args->minlen > args->maxlen || args->minlen > agsize ||
args->mod >= args->prod) {
- args->fsbno = NULLFSBLOCK;
trace_xfs_alloc_vextent_badargs(args);
return -ENOSPC;
}
+
+ if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
+ trace_xfs_alloc_vextent_skip_deadlock(args);
+ return -ENOSPC;
+ }
return 0;
+
}
/*
*/
static int
xfs_alloc_vextent_prepare_ag(
- struct xfs_alloc_arg *args)
+ struct xfs_alloc_arg *args,
+ uint32_t flags)
{
bool need_pag = !args->pag;
int error;
if (need_pag)
args->pag = xfs_perag_get(args->mp, args->agno);
- error = xfs_alloc_fix_freelist(args, 0);
+ args->agbp = NULL;
+ error = xfs_alloc_fix_freelist(args, flags);
if (error) {
trace_xfs_alloc_vextent_nofix(args);
if (need_pag)
XFS_STATS_INC(mp, xs_allocx);
XFS_STATS_ADD(mp, xs_allocb, args->len);
+ trace_xfs_alloc_vextent_finish(args);
+
out_drop_perag:
- if (drop_perag) {
- xfs_perag_put(args->pag);
+ if (drop_perag && args->pag) {
+ xfs_perag_rele(args->pag);
args->pag = NULL;
}
return error;
xfs_agnumber_t agno)
{
struct xfs_mount *mp = args->mp;
- xfs_agnumber_t minimum_agno = 0;
+ xfs_agnumber_t minimum_agno;
int error;
- if (args->tp->t_highest_agno != NULLAGNUMBER)
- minimum_agno = args->tp->t_highest_agno;
+ ASSERT(args->pag != NULL);
+ ASSERT(args->pag->pag_agno == agno);
- if (minimum_agno > agno) {
- trace_xfs_alloc_vextent_skip_deadlock(args);
- args->fsbno = NULLFSBLOCK;
- return 0;
- }
+ args->agno = agno;
+ args->agbno = 0;
+
+ trace_xfs_alloc_vextent_this_ag(args);
- error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0));
+ error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
+ &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
return error;
}
- args->agno = agno;
- args->agbno = 0;
-
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_size(args);
* we attempt to allocation in as there is no locality optimisation possible for
* those allocations.
*
+ * On return, args->pag may be left referenced if we finish before the "all
+ * failed" return point. The allocation finish still needs the perag, and
+ * so the caller will release it once they've finished the allocation.
+ *
* When we wrap the AG iteration at the end of the filesystem, we have to be
* careful not to wrap into AGs below ones we already have locked in the
* transaction if we are doing a blocking iteration. This will result in an
uint32_t flags)
{
struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t restart_agno = minimum_agno;
+ xfs_agnumber_t agno;
int error = 0;
- ASSERT(start_agno >= minimum_agno);
-
- /*
- * Loop over allocation groups twice; first time with
- * trylock set, second time without.
- */
- args->agno = start_agno;
- for (;;) {
- args->pag = xfs_perag_get(mp, args->agno);
- error = xfs_alloc_vextent_prepare_ag(args);
+ if (flags & XFS_ALLOC_FLAG_TRYLOCK)
+ restart_agno = 0;
+restart:
+ for_each_perag_wrap_range(mp, start_agno, restart_agno,
+ mp->m_sb.sb_agcount, agno, args->pag) {
+ args->agno = agno;
+ error = xfs_alloc_vextent_prepare_ag(args, flags);
if (error)
break;
-
- if (args->agbp) {
- /*
- * Allocation is supposed to succeed now, so break out
- * of the loop regardless of whether we succeed or not.
- */
- if (args->agno == start_agno && target_agbno) {
- args->agbno = target_agbno;
- error = xfs_alloc_ag_vextent_near(args);
- } else {
- args->agbno = 0;
- error = xfs_alloc_ag_vextent_size(args);
- }
- break;
- }
-
- trace_xfs_alloc_vextent_loopfailed(args);
-
- /*
- * If we are try-locking, we can't deadlock on AGF locks so we
- * can wrap all the way back to the first AG. Otherwise, wrap
- * back to the start AG so we can't deadlock and let the end of
- * scan handler decide what to do next.
- */
- if (++(args->agno) == mp->m_sb.sb_agcount) {
- if (flags & XFS_ALLOC_FLAG_TRYLOCK)
- args->agno = 0;
- else
- args->agno = minimum_agno;
+ if (!args->agbp) {
+ trace_xfs_alloc_vextent_loopfailed(args);
+ continue;
}
/*
- * Reached the starting a.g., must either be done
- * or switch to non-trylock mode.
+ * Allocation is supposed to succeed now, so break out of the
+ * loop regardless of whether we succeed or not.
*/
- if (args->agno == start_agno) {
- if (flags == 0) {
- args->agbno = NULLAGBLOCK;
- trace_xfs_alloc_vextent_allfailed(args);
- break;
- }
+ if (args->agno == start_agno && target_agbno) {
args->agbno = target_agbno;
- flags = 0;
+ error = xfs_alloc_ag_vextent_near(args);
+ } else {
+ args->agbno = 0;
+ error = xfs_alloc_ag_vextent_size(args);
}
- xfs_perag_put(args->pag);
+ break;
+ }
+ if (error) {
+ xfs_perag_rele(args->pag);
args->pag = NULL;
+ return error;
}
+ if (args->agbp)
+ return 0;
+
/*
- * The perag is left referenced in args for the caller to clean
- * up after they've finished the allocation.
+ * We didn't find an AG we can alloation from. If we were given
+ * constraining flags by the caller, drop them and retry the allocation
+ * without any constraints being set.
*/
- return error;
+ if (flags) {
+ flags = 0;
+ restart_agno = minimum_agno;
+ goto restart;
+ }
+
+ ASSERT(args->pag == NULL);
+ trace_xfs_alloc_vextent_allfailed(args);
+ return 0;
}
/*
xfs_fsblock_t target)
{
struct xfs_mount *mp = args->mp;
- xfs_agnumber_t minimum_agno = 0;
+ xfs_agnumber_t minimum_agno;
xfs_agnumber_t start_agno;
xfs_agnumber_t rotorstep = xfs_rotorstep;
bool bump_rotor = false;
int error;
- if (args->tp->t_highest_agno != NULLAGNUMBER)
- minimum_agno = args->tp->t_highest_agno;
+ ASSERT(args->pag == NULL);
- error = xfs_alloc_vextent_check_args(args, target);
+ args->agno = NULLAGNUMBER;
+ args->agbno = NULLAGBLOCK;
+
+ trace_xfs_alloc_vextent_start_ag(args);
+
+ error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
xfs_fsblock_t target)
{
struct xfs_mount *mp = args->mp;
- xfs_agnumber_t minimum_agno = 0;
+ xfs_agnumber_t minimum_agno;
xfs_agnumber_t start_agno;
int error;
- if (args->tp->t_highest_agno != NULLAGNUMBER)
- minimum_agno = args->tp->t_highest_agno;
+ ASSERT(args->pag == NULL);
+
+ args->agno = NULLAGNUMBER;
+ args->agbno = NULLAGBLOCK;
+
+ trace_xfs_alloc_vextent_first_ag(args);
- error = xfs_alloc_vextent_check_args(args, target);
+ error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
xfs_fsblock_t target)
{
struct xfs_mount *mp = args->mp;
- xfs_agnumber_t minimum_agno = 0;
+ xfs_agnumber_t minimum_agno;
int error;
- if (args->tp->t_highest_agno != NULLAGNUMBER)
- minimum_agno = args->tp->t_highest_agno;
+ ASSERT(args->pag != NULL);
+ ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
- error = xfs_alloc_vextent_check_args(args, target);
+ args->agno = XFS_FSB_TO_AGNO(mp, target);
+ args->agbno = XFS_FSB_TO_AGBNO(mp, target);
+
+ trace_xfs_alloc_vextent_exact_bno(args);
+
+ error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
return error;
}
- args->agno = XFS_FSB_TO_AGNO(mp, target);
- if (minimum_agno > args->agno) {
- trace_xfs_alloc_vextent_skip_deadlock(args);
- args->fsbno = NULLFSBLOCK;
- return 0;
- }
-
- args->agbno = XFS_FSB_TO_AGBNO(mp, target);
-
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_exact(args);
xfs_fsblock_t target)
{
struct xfs_mount *mp = args->mp;
- xfs_agnumber_t minimum_agno = 0;
+ xfs_agnumber_t minimum_agno;
bool needs_perag = args->pag == NULL;
int error;
- if (args->tp->t_highest_agno != NULLAGNUMBER)
- minimum_agno = args->tp->t_highest_agno;
+ if (!needs_perag)
+ ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
+
+ args->agno = XFS_FSB_TO_AGNO(mp, target);
+ args->agbno = XFS_FSB_TO_AGBNO(mp, target);
- error = xfs_alloc_vextent_check_args(args, target);
+ trace_xfs_alloc_vextent_near_bno(args);
+
+ error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
return error;
}
- args->agno = XFS_FSB_TO_AGNO(mp, target);
- if (minimum_agno > args->agno) {
- trace_xfs_alloc_vextent_skip_deadlock(args);
- args->fsbno = NULLFSBLOCK;
- return 0;
- }
-
if (needs_perag)
- args->pag = xfs_perag_get(mp, args->agno);
+ args->pag = xfs_perag_grab(mp, args->agno);
- args->agbno = XFS_FSB_TO_AGBNO(mp, target);
-
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_near(args);
int
__xfs_free_extent(
struct xfs_trans *tp,
- xfs_fsblock_t bno,
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *agbp;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
struct xfs_agf *agf;
int error;
unsigned int busy_flags = 0;
- struct xfs_perag *pag;
ASSERT(len != 0);
ASSERT(type != XFS_AG_RESV_AGFL);
XFS_ERRTAG_FREE_EXTENT))
return -EIO;
- pag = xfs_perag_get(mp, agno);
error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
if (error)
- goto err;
+ return error;
agf = agbp->b_addr;
if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
goto err_release;
}
- error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
+ error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
+ type);
if (error)
goto err_release;
if (skip_discard)
busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
- xfs_perag_put(pag);
return 0;
err_release:
xfs_trans_brelse(tp, agbp);
-err:
- xfs_perag_put(pag);
return error;
}
{
struct xfs_alloc_query_range_info *query = priv;
struct xfs_alloc_rec_incore irec;
+ xfs_failaddr_t fa;
+
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ fa = xfs_alloc_check_irec(cur, &irec);
+ if (fa)
+ return xfs_alloc_complain_bad_rec(cur, fa, &irec);
- irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
- irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
return query->fn(cur, &irec, query->priv);
}
return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
}
-/* Is there a record covering a given extent? */
+/*
+ * Scan part of the keyspace of the free space and tell us if the area has no
+ * records, is fully mapped by records, or is partially filled.
+ */
int
-xfs_alloc_has_record(
+xfs_alloc_has_records(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
xfs_extlen_t len,
- bool *exists)
+ enum xbtree_recpacking *outcome)
{
union xfs_btree_irec low;
union xfs_btree_irec high;
memset(&high, 0xFF, sizeof(high));
high.a.ar_startblock = bno + len - 1;
- return xfs_btree_has_record(cur, &low, &high, exists);
+ return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
}
/*