#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_errortag.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
+#include "xfs_ag.h"
#include "xfs_ag_resv.h"
#include "xfs_bmap.h"
-extern kmem_zone_t *xfs_bmap_free_item_zone;
+struct kmem_cache *xfs_extfree_item_cache;
struct workqueue_struct *xfs_alloc_wq;
#define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2
-STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
-
/*
* Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
* the beginning of the block for a proper header with the location information
{
unsigned int size = mp->m_sb.sb_sectsize;
- if (xfs_sb_version_hascrc(&mp->m_sb))
+ if (xfs_has_crc(mp))
size -= sizeof(struct xfs_agfl);
return size / sizeof(xfs_agblock_t);
xfs_refc_block(
struct xfs_mount *mp)
{
- if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ if (xfs_has_rmapbt(mp))
return XFS_RMAP_BLOCK(mp) + 1;
- if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ if (xfs_has_finobt(mp))
return XFS_FIBT_BLOCK(mp) + 1;
return XFS_IBT_BLOCK(mp) + 1;
}
xfs_prealloc_blocks(
struct xfs_mount *mp)
{
- if (xfs_sb_version_hasreflink(&mp->m_sb))
+ if (xfs_has_reflink(mp))
return xfs_refc_block(mp) + 1;
- if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ if (xfs_has_rmapbt(mp))
return XFS_RMAP_BLOCK(mp) + 1;
- if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ if (xfs_has_finobt(mp))
return XFS_FIBT_BLOCK(mp) + 1;
return XFS_IBT_BLOCK(mp) + 1;
}
/*
+ * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
+ * guarantee that we can refill the AGFL prior to allocating space in a nearly
+ * full AG. Although the space described by the free space btrees, the
+ * blocks used by the freesp btrees themselves, and the blocks owned by the
+ * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
+ * free space in the AG drop so low that the free space btrees cannot refill an
+ * empty AGFL up to the minimum level. Rather than grind through empty AGs
+ * until the fs goes down, we subtract this many AG blocks from the incore
+ * fdblocks to ensure user allocation does not overcommit the space the
+ * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
+ * withhold space from xfs_mod_fdblocks, so we do not account for that here.
+ */
+#define XFS_ALLOCBT_AGFL_RESERVE 4
+
+/*
+ * Compute the number of blocks that we set aside to guarantee the ability to
+ * refill the AGFL and handle a full bmap btree split.
+ *
* In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
* AGF buffer (PV 947395), we place constraints on the relationship among
* actual allocations for data blocks, freelist blocks, and potential file data
* extents need to be actually allocated. To get around this, we explicitly set
* aside a few blocks which will not be reserved in delayed allocation.
*
- * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
- * potential split of the file's bmap btree.
+ * For each AG, we need to reserve enough blocks to replenish a totally empty
+ * AGFL and 4 more to handle a potential split of the file's bmap btree.
*/
unsigned int
xfs_alloc_set_aside(
struct xfs_mount *mp)
{
- return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
+ return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
}
/*
unsigned int blocks;
blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
- blocks += XFS_ALLOC_AGFL_RESERVE;
+ blocks += XFS_ALLOCBT_AGFL_RESERVE;
blocks += 3; /* AGF, AGI btree root blocks */
- if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ if (xfs_has_finobt(mp))
blocks++; /* finobt root block */
- if (xfs_sb_version_hasrmapbt(&mp->m_sb))
- blocks++; /* rmap root block */
- if (xfs_sb_version_hasreflink(&mp->m_sb))
+ if (xfs_has_rmapbt(mp))
+ blocks++; /* rmap root block */
+ if (xfs_has_reflink(mp))
blocks++; /* refcount root block */
return mp->m_sb.sb_agblocks - blocks;
cur->bc_rec.a.ar_startblock = bno;
cur->bc_rec.a.ar_blockcount = len;
error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
- cur->bc_private.a.priv.abt.active = (*stat == 1);
+ cur->bc_ag.abt.active = (*stat == 1);
return error;
}
cur->bc_rec.a.ar_startblock = bno;
cur->bc_rec.a.ar_blockcount = len;
error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
- cur->bc_private.a.priv.abt.active = (*stat == 1);
+ cur->bc_ag.abt.active = (*stat == 1);
return error;
}
cur->bc_rec.a.ar_startblock = bno;
cur->bc_rec.a.ar_blockcount = len;
error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
- cur->bc_private.a.priv.abt.active = (*stat == 1);
+ cur->bc_ag.abt.active = (*stat == 1);
return error;
}
xfs_alloc_cur_active(
struct xfs_btree_cur *cur)
{
- return cur && cur->bc_private.a.priv.abt.active;
+ return cur && cur->bc_ag.abt.active;
}
/*
return xfs_btree_update(cur, &rec);
}
+/* Convert the ondisk btree record to its incore representation. */
+void
+xfs_alloc_btrec_to_irec(
+ const union xfs_btree_rec *rec,
+ struct xfs_alloc_rec_incore *irec)
+{
+ irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
+ irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
+}
+
+/* Simple checks for free space records. */
+xfs_failaddr_t
+xfs_alloc_check_irec(
+ struct xfs_btree_cur *cur,
+ const struct xfs_alloc_rec_incore *irec)
+{
+ struct xfs_perag *pag = cur->bc_ag.pag;
+
+ if (irec->ar_blockcount == 0)
+ return __this_address;
+
+ /* check for valid extent range, including overflow */
+ if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
+ return __this_address;
+
+ return NULL;
+}
+
+static inline int
+xfs_alloc_complain_bad_rec(
+ struct xfs_btree_cur *cur,
+ xfs_failaddr_t fa,
+ const struct xfs_alloc_rec_incore *irec)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+
+ xfs_warn(mp,
+ "%s Freespace BTree record corruption in AG %d detected at %pS!",
+ cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
+ cur->bc_ag.pag->pag_agno, fa);
+ xfs_warn(mp,
+ "start block 0x%x block count 0x%x", irec->ar_startblock,
+ irec->ar_blockcount);
+ return -EFSCORRUPTED;
+}
+
/*
* Get the data from the pointed-to record.
*/
xfs_extlen_t *len, /* output: length of extent */
int *stat) /* output: success/failure */
{
- struct xfs_mount *mp = cur->bc_mp;
- xfs_agnumber_t agno = cur->bc_private.a.agno;
+ struct xfs_alloc_rec_incore irec;
union xfs_btree_rec *rec;
+ xfs_failaddr_t fa;
int error;
error = xfs_btree_get_rec(cur, &rec, stat);
if (error || !(*stat))
return error;
- *bno = be32_to_cpu(rec->alloc.ar_startblock);
- *len = be32_to_cpu(rec->alloc.ar_blockcount);
-
- if (*len == 0)
- goto out_bad_rec;
-
- /* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(mp, agno, *bno))
- goto out_bad_rec;
- if (*bno > *bno + *len)
- goto out_bad_rec;
- if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
- goto out_bad_rec;
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ fa = xfs_alloc_check_irec(cur, &irec);
+ if (fa)
+ return xfs_alloc_complain_bad_rec(cur, fa, &irec);
+ *bno = irec.ar_startblock;
+ *len = irec.ar_blockcount;
return 0;
-
-out_bad_rec:
- xfs_warn(mp,
- "%s Freespace BTree record corruption in AG %d detected!",
- cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
- xfs_warn(mp,
- "start block 0x%x block count 0x%x", *bno, *len);
- return -EFSCORRUPTED;
}
/*
*/
STATIC int /* error code */
xfs_alloc_fixup_trees(
- xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
- xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
+ struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
+ struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
xfs_agblock_t fbno, /* starting block of free extent */
xfs_extlen_t flen, /* length of free extent */
xfs_agblock_t rbno, /* starting block of returned extent */
struct xfs_btree_block *bnoblock;
struct xfs_btree_block *cntblock;
- bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
- cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
+ bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
+ cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
if (XFS_IS_CORRUPT(mp,
bnoblock->bb_numrecs !=
return 0;
}
+/*
+ * We do not verify the AGFL contents against AGF-based index counters here,
+ * even though we may have access to the perag that contains shadow copies. We
+ * don't know if the AGF based counters have been checked, and if they have they
+ * still may be inconsistent because they haven't yet been reset on the first
+ * allocation after the AGF has been read in.
+ *
+ * This means we can only check that all agfl entries contain valid or null
+ * values because we can't reliably determine the active range to exclude
+ * NULLAGBNO as a valid value.
+ *
+ * However, we can't even do that for v4 format filesystems because there are
+ * old versions of mkfs out there that does not initialise the AGFL to known,
+ * verifiable values. HEnce we can't tell the difference between a AGFL block
+ * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
+ *
+ * As a result, we can only fully validate AGFL block numbers when we pull them
+ * from the freelist in xfs_alloc_get_freelist().
+ */
static xfs_failaddr_t
xfs_agfl_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_mount;
struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
+ __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
int i;
- /*
- * There is no verification of non-crc AGFLs because mkfs does not
- * initialise the AGFL to zero or NULL. Hence the only valid part of the
- * AGFL is what the AGF says is active. We can't get to the AGF, so we
- * can't verify just those entries are valid.
- */
- if (!xfs_sb_version_hascrc(&mp->m_sb))
+ if (!xfs_has_crc(mp))
return NULL;
if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
return __this_address;
for (i = 0; i < xfs_agfl_size(mp); i++) {
- if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
- be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
+ if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
+ be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
return __this_address;
}
* AGFL is what the AGF says is active. We can't get to the AGF, so we
* can't verify just those entries are valid.
*/
- if (!xfs_sb_version_hascrc(&mp->m_sb))
+ if (!xfs_has_crc(mp))
return;
if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
xfs_failaddr_t fa;
/* no verification of non-crc AGFLs */
- if (!xfs_sb_version_hascrc(&mp->m_sb))
+ if (!xfs_has_crc(mp))
return;
fa = xfs_agfl_verify(bp);
/*
* Read in the allocation group free block array.
*/
-int /* error */
+int
xfs_alloc_read_agfl(
- xfs_mount_t *mp, /* mount point structure */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_buf_t **bpp) /* buffer for the ag free block array */
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ struct xfs_buf **bpp)
{
- xfs_buf_t *bp; /* return value */
- int error;
+ struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_buf *bp;
+ int error;
- ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
+ XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
if (error)
return error;
STATIC int
xfs_alloc_update_counters(
struct xfs_trans *tp,
- struct xfs_perag *pag,
struct xfs_buf *agbp,
long len)
{
- struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_agf *agf = agbp->b_addr;
- pag->pagf_freeblks += len;
+ agbp->b_pag->pagf_freeblks += len;
be32_add_cpu(&agf->agf_freeblks, len);
- xfs_trans_agblocks_delta(tp, len);
if (unlikely(be32_to_cpu(agf->agf_freeblks) >
be32_to_cpu(agf->agf_length))) {
- xfs_buf_corruption_error(agbp);
+ xfs_buf_mark_corrupt(agbp);
return -EFSCORRUPTED;
}
int error;
int i;
- ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
-
acur->cur_len = args->maxlen;
acur->rec_bno = 0;
acur->rec_len = 0;
*/
if (!acur->cnt)
acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
- args->agbp, args->agno, XFS_BTNUM_CNT);
+ args->agbp, args->pag, XFS_BTNUM_CNT);
error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
if (error)
return error;
*/
if (!acur->bnolt)
acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
- args->agbp, args->agno, XFS_BTNUM_BNO);
+ args->agbp, args->pag, XFS_BTNUM_BNO);
if (!acur->bnogt)
acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
- args->agbp, args->agno, XFS_BTNUM_BNO);
+ args->agbp, args->pag, XFS_BTNUM_BNO);
return i == 1 ? 0 : -ENOSPC;
}
* We have an aligned record that satisfies minlen and beats or matches
* the candidate extent size. Compare locality for near allocation mode.
*/
- ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
diff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, args->datatype,
bnoa, lena, &bnew);
deactivate = true;
out:
if (deactivate)
- cur->bc_private.a.priv.abt.active = false;
+ cur->bc_ag.abt.active = false;
trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
*new);
return 0;
struct xfs_alloc_arg *args,
struct xfs_alloc_cur *acur)
{
+ struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
int error;
ASSERT(acur->cnt && acur->bnolt);
ASSERT(acur->bno >= acur->rec_bno);
ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
- ASSERT(acur->rec_bno + acur->rec_len <=
- be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+ ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
acur->rec_len, acur->bno, acur->len, 0);
xfs_extlen_t *flenp, /* result length */
int *stat) /* status: 0-freelist, 1-normal/none */
{
+ struct xfs_agf *agf = args->agbp->b_addr;
int error = 0;
xfs_agblock_t fbno = NULLAGBLOCK;
xfs_extlen_t flen = 0;
if (args->minlen != 1 || args->alignment != 1 ||
args->resv == XFS_AG_RESV_AGFL ||
- (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
- args->minleft))
+ be32_to_cpu(agf->agf_flcount) <= args->minleft)
goto out;
- error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+ error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
+ &fbno, 0);
if (error)
goto error;
if (fbno == NULLAGBLOCK)
goto out;
- xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
+ xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
(args->datatype & XFS_ALLOC_NOBUSY));
if (args->datatype & XFS_ALLOC_USERDATA) {
}
*fbnop = args->agbno = fbno;
*flenp = args->len = 1;
- if (XFS_IS_CORRUPT(args->mp,
- fbno >= be32_to_cpu(
- XFS_BUF_TO_AGF(args->agbp)->agf_length))) {
+ if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
error = -EFSCORRUPTED;
goto error;
}
* If we're feeding an AGFL block to something that doesn't live in the
* free space, we need to clear out the OWN_AG rmap.
*/
- error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
+ error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
&XFS_RMAP_OINFO_AG);
if (error)
goto error;
return error;
}
-/*
- * Allocate a variable extent in the allocation group agno.
- * Type and bno are used to determine where in the allocation group the
- * extent will start.
- * Extent's length (returned in *len) will be between minlen and maxlen,
- * and of the form k * prod + mod unless there's nothing that large.
- * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
- */
-STATIC int /* error */
-xfs_alloc_ag_vextent(
- xfs_alloc_arg_t *args) /* argument structure for allocation */
-{
- int error=0;
-
- ASSERT(args->minlen > 0);
- ASSERT(args->maxlen > 0);
- ASSERT(args->minlen <= args->maxlen);
- ASSERT(args->mod < args->prod);
- ASSERT(args->alignment > 0);
-
- /*
- * Branch to correct routine based on the type.
- */
- args->wasfromfl = 0;
- switch (args->type) {
- case XFS_ALLOCTYPE_THIS_AG:
- error = xfs_alloc_ag_vextent_size(args);
- break;
- case XFS_ALLOCTYPE_NEAR_BNO:
- error = xfs_alloc_ag_vextent_near(args);
- break;
- case XFS_ALLOCTYPE_THIS_BNO:
- error = xfs_alloc_ag_vextent_exact(args);
- break;
- default:
- ASSERT(0);
- /* NOTREACHED */
- }
-
- if (error || args->agbno == NULLAGBLOCK)
- return error;
-
- ASSERT(args->len >= args->minlen);
- ASSERT(args->len <= args->maxlen);
- ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
- ASSERT(args->agbno % args->alignment == 0);
-
- /* if not file data, insert new block into the reverse map btree */
- if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
- error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
- args->agbno, args->len, &args->oinfo);
- if (error)
- return error;
- }
-
- if (!args->wasfromfl) {
- error = xfs_alloc_update_counters(args->tp, args->pag,
- args->agbp,
- -((long)(args->len)));
- if (error)
- return error;
-
- ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
- args->agbno, args->len));
- }
-
- xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
-
- XFS_STATS_INC(args->mp, xs_allocx);
- XFS_STATS_ADD(args->mp, xs_allocb, args->len);
- return error;
-}
-
/*
* Allocate a variable extent at exactly agno/bno.
* Extent's length (returned in *len) will be between minlen and maxlen,
xfs_alloc_ag_vextent_exact(
xfs_alloc_arg_t *args) /* allocation argument structure */
{
- xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
- xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
+ struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
+ struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
+ struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
int error;
xfs_agblock_t fbno; /* start block of found extent */
xfs_extlen_t flen; /* length of found extent */
* Allocate/initialize a cursor for the by-number freespace btree.
*/
bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO);
+ args->pag, XFS_BTNUM_BNO);
/*
* Lookup bno and minlen in the btree (minlen is irrelevant, really).
* Allocate/initialize a cursor for the by-size btree.
*/
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT);
- ASSERT(args->agbno + args->len <=
- be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+ args->pag, XFS_BTNUM_CNT);
+ ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
args->len, XFSA_FIXUP_BNO_OK);
if (error) {
if (error)
return error;
if (i == 0)
- cur->bc_private.a.priv.abt.active = false;
+ cur->bc_ag.abt.active = false;
if (count > 0)
count--;
bool fbinc;
ASSERT(acur->len == 0);
- ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
*stat = 0;
if (error)
return error;
if (i) {
- acur->cnt->bc_private.a.priv.abt.active = true;
+ acur->cnt->bc_ag.abt.active = true;
fbcur = acur->cnt;
fbinc = false;
}
#ifdef DEBUG
/* Randomly don't execute the first algorithm. */
- if (prandom_u32() & 1)
+ if (get_random_u32_below(2))
return 0;
#endif
* maxlen, go to the start of this block, and skip all those smaller
* than minlen.
*/
- if (len || args->alignment > 1) {
- acur->cnt->bc_ptrs[0] = 1;
+ if (*len || args->alignment > 1) {
+ acur->cnt->bc_levels[0].ptr = 1;
do {
error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
if (error)
xfs_alloc_ag_vextent_size(
xfs_alloc_arg_t *args) /* allocation argument structure */
{
- xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
- xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
+ struct xfs_agf *agf = args->agbp->b_addr;
+ struct xfs_btree_cur *bno_cur; /* cursor for bno btree */
+ struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */
int error; /* error result */
xfs_agblock_t fbno; /* start of found freespace */
xfs_extlen_t flen; /* length of found freespace */
* Allocate and initialize a cursor for the by-size btree.
*/
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT);
+ args->pag, XFS_BTNUM_CNT);
bno_cur = NULL;
- busy = false;
/*
* Look for an entry >= maxlen+alignment-1 blocks.
* Allocate and initialize a cursor for the by-block tree.
*/
bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO);
+ args->pag, XFS_BTNUM_BNO);
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
rbno, rlen, XFSA_FIXUP_CNT_OK)))
goto error0;
args->agbno = rbno;
if (XFS_IS_CORRUPT(args->mp,
args->agbno + args->len >
- be32_to_cpu(
- XFS_BUF_TO_AGF(args->agbp)->agf_length))) {
+ be32_to_cpu(agf->agf_length))) {
error = -EFSCORRUPTED;
goto error0;
}
enum xfs_ag_resv_type type)
{
struct xfs_mount *mp;
- struct xfs_perag *pag;
struct xfs_btree_cur *bno_cur;
struct xfs_btree_cur *cnt_cur;
xfs_agblock_t gtbno; /* start of right neighbor */
int haveright; /* have a right neighbor */
int i;
int error;
+ struct xfs_perag *pag = agbp->b_pag;
bno_cur = cnt_cur = NULL;
mp = tp->t_mountp;
if (!xfs_rmap_should_skip_owner_update(oinfo)) {
- error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
+ error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
if (error)
goto error0;
}
/*
* Allocate and initialize a cursor for the by-block btree.
*/
- bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
+ bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
/*
* Look for a neighboring block on the left (lower block numbers)
* that is contiguous with this space.
/*
* Now allocate and initialize a cursor for the by-size tree.
*/
- cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
+ cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
/*
* Have both left and right contiguous neighbors.
* Merge all three into a single free block.
/*
* Update the freespace totals in the ag and superblock.
*/
- pag = xfs_perag_get(mp, agno);
- error = xfs_alloc_update_counters(tp, pag, agbp, len);
- xfs_ag_resv_free_extent(pag, type, tp, len);
- xfs_perag_put(pag);
+ error = xfs_alloc_update_counters(tp, agbp, len);
+ xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
if (error)
goto error0;
*/
/*
- * Compute and fill in value of m_ag_maxlevels.
+ * Compute and fill in value of m_alloc_maxlevels.
*/
void
xfs_alloc_compute_maxlevels(
xfs_mount_t *mp) /* file system mount structure */
{
- mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
+ mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
(mp->m_sb.sb_agblocks + 1) / 2);
+ ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
}
/*
const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
unsigned int min_free;
- ASSERT(mp->m_ag_maxlevels > 0);
+ ASSERT(mp->m_alloc_maxlevels > 0);
/* space needed by-bno freespace btree */
min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
- mp->m_ag_maxlevels);
+ mp->m_alloc_maxlevels);
/* space needed by-size freespace btree */
min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
- mp->m_ag_maxlevels);
+ mp->m_alloc_maxlevels);
/* space needed reverse mapping used space btree */
- if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ if (xfs_has_rmapbt(mp))
min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
mp->m_rmap_maxlevels);
}
/*
- * Check the agfl fields of the agf for inconsistency or corruption. The purpose
- * is to detect an agfl header padding mismatch between current and early v5
- * kernels. This problem manifests as a 1-slot size difference between the
- * on-disk flcount and the active [first, last] range of a wrapped agfl. This
- * may also catch variants of agfl count corruption unrelated to padding. Either
- * way, we'll reset the agfl and warn the user.
+ * Check the agfl fields of the agf for inconsistency or corruption.
+ *
+ * The original purpose was to detect an agfl header padding mismatch between
+ * current and early v5 kernels. This problem manifests as a 1-slot size
+ * difference between the on-disk flcount and the active [first, last] range of
+ * a wrapped agfl.
+ *
+ * However, we need to use these same checks to catch agfl count corruptions
+ * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
+ * way, we need to reset the agfl and warn the user.
*
* Return true if a reset is required before the agfl can be used, false
* otherwise.
int agfl_size = xfs_agfl_size(mp);
int active;
- /* no agfl header on v4 supers */
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
-
/*
* The agf read verifier catches severe corruption of these fields.
* Repeat some sanity checks to cover a packed -> unpacked mismatch if
struct xfs_perag *pag)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_agf *agf = agbp->b_addr;
- ASSERT(pag->pagf_agflreset);
+ ASSERT(xfs_perag_agfl_needs_reset(pag));
trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
xfs_warn(mp,
XFS_AGF_FLCOUNT);
pag->pagf_flcount = 0;
- pag->pagf_agflreset = false;
+ clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
}
/*
* Defer an AGFL block free. This is effectively equivalent to
- * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
+ * xfs_free_extent_later() with some special handling particular to AGFL blocks.
*
* Deferring AGFL frees helps prevent log reservation overruns due to too many
* allocation operations in a transaction. AGFL frees are prone to this problem
struct xfs_owner_info *oinfo)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_extent_free_item *new; /* new element */
+ struct xfs_extent_free_item *xefi;
- ASSERT(xfs_bmap_free_item_zone != NULL);
+ ASSERT(xfs_extfree_item_cache != NULL);
ASSERT(oinfo != NULL);
- new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0);
- new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
- new->xefi_blockcount = 1;
- new->xefi_oinfo = *oinfo;
+ xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
+ GFP_KERNEL | __GFP_NOFAIL);
+ xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
+ xefi->xefi_blockcount = 1;
+ xefi->xefi_owner = oinfo->oi_owner;
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
- xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
+ xfs_extent_free_get_group(mp, xefi);
+ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
+}
+
+/*
+ * Add the extent to the list of extents to be free at transaction end.
+ * The list is maintained sorted (by block number).
+ */
+void
+__xfs_free_extent_later(
+ struct xfs_trans *tp,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ const struct xfs_owner_info *oinfo,
+ bool skip_discard)
+{
+ struct xfs_extent_free_item *xefi;
+ struct xfs_mount *mp = tp->t_mountp;
+#ifdef DEBUG
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+
+ ASSERT(bno != NULLFSBLOCK);
+ ASSERT(len > 0);
+ ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
+ ASSERT(!isnullstartblock(bno));
+ agno = XFS_FSB_TO_AGNO(mp, bno);
+ agbno = XFS_FSB_TO_AGBNO(mp, bno);
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(agbno < mp->m_sb.sb_agblocks);
+ ASSERT(len < mp->m_sb.sb_agblocks);
+ ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
+#endif
+ ASSERT(xfs_extfree_item_cache != NULL);
+
+ xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
+ GFP_KERNEL | __GFP_NOFAIL);
+ xefi->xefi_startblock = bno;
+ xefi->xefi_blockcount = (xfs_extlen_t)len;
+ if (skip_discard)
+ xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
+ if (oinfo) {
+ ASSERT(oinfo->oi_offset == 0);
+
+ if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
+ xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
+ if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
+ xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
+ xefi->xefi_owner = oinfo->oi_owner;
+ } else {
+ xefi->xefi_owner = XFS_RMAP_OWN_NULL;
+ }
+ trace_xfs_bmap_free_defer(mp,
+ XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
+ XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
+
+ xfs_extent_free_get_group(mp, xefi);
+ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
+}
+
+#ifdef DEBUG
+/*
+ * Check if an AGF has a free extent record whose length is equal to
+ * args->minlen.
+ */
+STATIC int
+xfs_exact_minlen_extent_available(
+ struct xfs_alloc_arg *args,
+ struct xfs_buf *agbp,
+ int *stat)
+{
+ struct xfs_btree_cur *cnt_cur;
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ int error = 0;
+
+ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
+ args->pag, XFS_BTNUM_CNT);
+ error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
+ if (error)
+ goto out;
+
+ if (*stat == 0) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
+ if (error)
+ goto out;
+
+ if (*stat == 1 && flen != args->minlen)
+ *stat = 0;
+
+out:
+ xfs_btree_del_cursor(cnt_cur, error);
+
+ return error;
}
+#endif
/*
* Decide whether to use this allocation group for this allocation.
/* deferred ops (AGFL block frees) require permanent transactions */
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
- if (!pag->pagf_init) {
- error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
+ if (!xfs_perag_initialised_agf(pag)) {
+ error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
if (error) {
/* Couldn't lock the AGF so skip this AG. */
if (error == -EAGAIN)
* somewhere else if we are not being asked to try harder at this
* point
*/
- if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
+ if (xfs_perag_prefers_metadata(pag) &&
+ (args->datatype & XFS_ALLOC_USERDATA) &&
(flags & XFS_ALLOC_FLAG_TRYLOCK)) {
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
goto out_agbp_relse;
* Can fail if we're not blocking on locks, and it's held.
*/
if (!agbp) {
- error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
+ error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
if (error) {
/* Couldn't lock the AGF so skip this AG. */
if (error == -EAGAIN)
}
/* reset a padding mismatched agfl before final free space check */
- if (pag->pagf_agflreset)
+ if (xfs_perag_agfl_needs_reset(pag))
xfs_agfl_reset(tp, agbp, pag);
/* If there isn't enough total space or single-extent, reject it. */
if (!xfs_alloc_space_available(args, need, flags))
goto out_agbp_relse;
+#ifdef DEBUG
+ if (args->alloc_minlen_only) {
+ int stat;
+
+ error = xfs_exact_minlen_extent_available(args, agbp, &stat);
+ if (error || !stat)
+ goto out_agbp_relse;
+ }
+#endif
/*
* Make the freelist shorter if it's too long.
*
else
targs.oinfo = XFS_RMAP_OINFO_AG;
while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
- error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
+ error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
if (error)
goto out_agbp_relse;
targs.agbp = agbp;
targs.agno = args->agno;
targs.alignment = targs.minlen = targs.prod = 1;
- targs.type = XFS_ALLOCTYPE_THIS_AG;
targs.pag = pag;
- error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
+ error = xfs_alloc_read_agfl(pag, tp, &agflbp);
if (error)
goto out_agbp_relse;
targs.resv = XFS_AG_RESV_AGFL;
/* Allocate as many blocks as possible at once. */
- error = xfs_alloc_ag_vextent(&targs);
+ error = xfs_alloc_ag_vextent_size(&targs);
if (error)
goto out_agflbp_relse;
break;
goto out_agflbp_relse;
}
+
+ if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
+ error = xfs_rmap_alloc(tp, agbp, pag,
+ targs.agbno, targs.len, &targs.oinfo);
+ if (error)
+ goto out_agflbp_relse;
+ }
+ error = xfs_alloc_update_counters(tp, agbp,
+ -((long)(targs.len)));
+ if (error)
+ goto out_agflbp_relse;
+
/*
* Put each allocated block on the list.
*/
for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
- error = xfs_alloc_put_freelist(tp, agbp,
+ error = xfs_alloc_put_freelist(pag, tp, agbp,
agflbp, bno, 0);
if (error)
goto out_agflbp_relse;
* Get a block from the freelist.
* Returns with the buffer for the block gotten.
*/
-int /* error */
+int
xfs_alloc_get_freelist(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *agbp, /* buffer containing the agf structure */
- xfs_agblock_t *bnop, /* block address retrieved from freelist */
- int btreeblk) /* destination is a AGF btree */
-{
- xfs_agf_t *agf; /* a.g. freespace structure */
- xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
- xfs_agblock_t bno; /* block number returned */
- __be32 *agfl_bno;
- int error;
- int logflags;
- xfs_mount_t *mp = tp->t_mountp;
- xfs_perag_t *pag; /* per allocation group data */
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agblock_t *bnop,
+ int btreeblk)
+{
+ struct xfs_agf *agf = agbp->b_addr;
+ struct xfs_buf *agflbp;
+ xfs_agblock_t bno;
+ __be32 *agfl_bno;
+ int error;
+ uint32_t logflags;
+ struct xfs_mount *mp = tp->t_mountp;
/*
* Freelist is empty, give up.
*/
- agf = XFS_BUF_TO_AGF(agbp);
if (!agf->agf_flcount) {
*bnop = NULLAGBLOCK;
return 0;
/*
* Read the array of free blocks.
*/
- error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
- &agflbp);
+ error = xfs_alloc_read_agfl(pag, tp, &agflbp);
if (error)
return error;
/*
* Get the block number and update the data structures.
*/
- agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+ agfl_bno = xfs_buf_to_agfl_bno(agflbp);
bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
+ if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
+ return -EFSCORRUPTED;
+
be32_add_cpu(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
agf->agf_flfirst = 0;
- pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
- ASSERT(!pag->pagf_agflreset);
+ ASSERT(!xfs_perag_agfl_needs_reset(pag));
be32_add_cpu(&agf->agf_flcount, -1);
- xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
pag->pagf_btreeblks++;
logflags |= XFS_AGF_BTREEBLKS;
}
- xfs_perag_put(pag);
xfs_alloc_log_agf(tp, agbp, logflags);
*bnop = bno;
*/
void
xfs_alloc_log_agf(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *bp, /* buffer for a.g. freelist header */
- int fields) /* mask of fields to be logged (XFS_AGF_...) */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp,
+ uint32_t fields)
{
int first; /* first byte offset */
int last; /* last byte offset */
sizeof(xfs_agf_t)
};
- trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
+ trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
}
-/*
- * Interface for inode allocation to force the pag data to be initialized.
- */
-int /* error */
-xfs_alloc_pagf_init(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- int flags) /* XFS_ALLOC_FLAGS_... */
-{
- xfs_buf_t *bp;
- int error;
-
- error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp);
- if (!error)
- xfs_trans_brelse(tp, bp);
- return error;
-}
-
/*
* Put the block on the freelist for the allocation group.
*/
-int /* error */
+int
xfs_alloc_put_freelist(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *agbp, /* buffer for a.g. freelist header */
- xfs_buf_t *agflbp,/* buffer for a.g. free block array */
- xfs_agblock_t bno, /* block being freed */
- int btreeblk) /* block came from a AGF btree */
-{
- xfs_agf_t *agf; /* a.g. freespace structure */
- __be32 *blockp;/* pointer to array entry */
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ struct xfs_buf *agflbp,
+ xfs_agblock_t bno,
+ int btreeblk)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agf *agf = agbp->b_addr;
+ __be32 *blockp;
int error;
- int logflags;
- xfs_mount_t *mp; /* mount structure */
- xfs_perag_t *pag; /* per allocation group data */
+ uint32_t logflags;
__be32 *agfl_bno;
int startoff;
- agf = XFS_BUF_TO_AGF(agbp);
- mp = tp->t_mountp;
+ if (!agflbp) {
+ error = xfs_alloc_read_agfl(pag, tp, &agflbp);
+ if (error)
+ return error;
+ }
- if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
- be32_to_cpu(agf->agf_seqno), &agflbp)))
- return error;
be32_add_cpu(&agf->agf_fllast, 1);
if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
agf->agf_fllast = 0;
- pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
- ASSERT(!pag->pagf_agflreset);
+ ASSERT(!xfs_perag_agfl_needs_reset(pag));
be32_add_cpu(&agf->agf_flcount, 1);
- xfs_trans_agflist_delta(tp, 1);
pag->pagf_flcount++;
logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
pag->pagf_btreeblks--;
logflags |= XFS_AGF_BTREEBLKS;
}
- xfs_perag_put(pag);
xfs_alloc_log_agf(tp, agbp, logflags);
ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
- agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+ agfl_bno = xfs_buf_to_agfl_bno(agflbp);
blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
*blockp = cpu_to_be32(bno);
startoff = (char *)blockp - (char *)agflbp->b_addr;
return 0;
}
+/*
+ * Verify the AGF is consistent.
+ *
+ * We do not verify the AGFL indexes in the AGF are fully consistent here
+ * because of issues with variable on-disk structure sizes. Instead, we check
+ * the agfl indexes for consistency when we initialise the perag from the AGF
+ * information after a read completes.
+ *
+ * If the index is inconsistent, then we mark the perag as needing an AGFL
+ * reset. The first AGFL update performed then resets the AGFL indexes and
+ * refills the AGFL with known good free blocks, allowing the filesystem to
+ * continue operating normally at the cost of a few leaked free space blocks.
+ */
static xfs_failaddr_t
xfs_agf_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_mount;
- struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
+ struct xfs_agf *agf = bp->b_addr;
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (xfs_has_crc(mp)) {
if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
return __this_address;
- if (!xfs_log_check_lsn(mp,
- be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
return __this_address;
}
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
+ mp->m_alloc_maxlevels ||
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
+ mp->m_alloc_maxlevels)
return __this_address;
- if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+ if (xfs_has_rmapbt(mp) &&
(be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
+ mp->m_rmap_maxlevels))
return __this_address;
- if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+ if (xfs_has_rmapbt(mp) &&
be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
return __this_address;
if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
return __this_address;
- if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
+ if (xfs_has_lazysbcount(mp) &&
be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
return __this_address;
- if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+ if (xfs_has_reflink(mp) &&
be32_to_cpu(agf->agf_refcount_blocks) >
be32_to_cpu(agf->agf_length))
return __this_address;
- if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+ if (xfs_has_reflink(mp) &&
(be32_to_cpu(agf->agf_refcount_level) < 1 ||
- be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
+ be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels))
return __this_address;
return NULL;
-
}
static void
struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
- if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ if (xfs_has_crc(mp) &&
!xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
else {
{
struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
+ struct xfs_agf *agf = bp->b_addr;
xfs_failaddr_t fa;
fa = xfs_agf_verify(bp);
return;
}
- if (!xfs_sb_version_hascrc(&mp->m_sb))
+ if (!xfs_has_crc(mp))
return;
if (bip)
- XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
}
/*
* Read in the allocation group header (free/alloc section).
*/
-int /* error */
+int
xfs_read_agf(
- struct xfs_mount *mp, /* mount point structure */
- struct xfs_trans *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- int flags, /* XFS_BUF_ */
- struct xfs_buf **bpp) /* buffer for the ag freelist header */
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ int flags,
+ struct xfs_buf **agfbpp)
{
- int error;
+ struct xfs_mount *mp = pag->pag_mount;
+ int error;
- trace_xfs_read_agf(mp, agno);
+ trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
- ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
+ XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
+ XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
if (error)
return error;
- ASSERT(!(*bpp)->b_error);
- xfs_buf_set_ref(*bpp, XFS_AGF_REF);
+ xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
return 0;
}
/*
- * Read in the allocation group header (free/alloc section).
+ * Read in the allocation group header (free/alloc section) and initialise the
+ * perag structure if necessary. If the caller provides @agfbpp, then return the
+ * locked buffer to the caller, otherwise free it.
*/
-int /* error */
+int
xfs_alloc_read_agf(
- struct xfs_mount *mp, /* mount point structure */
- struct xfs_trans *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- int flags, /* XFS_ALLOC_FLAG_... */
- struct xfs_buf **bpp) /* buffer for the ag freelist header */
-{
- struct xfs_agf *agf; /* ag freelist header */
- struct xfs_perag *pag; /* per allocation group data */
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ int flags,
+ struct xfs_buf **agfbpp)
+{
+ struct xfs_buf *agfbp;
+ struct xfs_agf *agf;
int error;
+ int allocbt_blks;
- trace_xfs_alloc_read_agf(mp, agno);
+ trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
/* We don't support trylock when freeing. */
ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
(XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
- ASSERT(agno != NULLAGNUMBER);
- error = xfs_read_agf(mp, tp, agno,
+ error = xfs_read_agf(pag, tp,
(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
- bpp);
+ &agfbp);
if (error)
return error;
- ASSERT(!(*bpp)->b_error);
- agf = XFS_BUF_TO_AGF(*bpp);
- pag = xfs_perag_get(mp, agno);
- if (!pag->pagf_init) {
+ agf = agfbp->b_addr;
+ if (!xfs_perag_initialised_agf(pag)) {
pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
pag->pagf_levels[XFS_BTNUM_RMAPi] =
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
- pag->pagf_init = 1;
- pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
+ if (xfs_agfl_needs_reset(pag->pag_mount, agf))
+ set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
+ else
+ clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
+
+ /*
+ * Update the in-core allocbt counter. Filter out the rmapbt
+ * subset of the btreeblks counter because the rmapbt is managed
+ * by perag reservation. Subtract one for the rmapbt root block
+ * because the rmap counter includes it while the btreeblks
+ * counter only tracks non-root blocks.
+ */
+ allocbt_blks = pag->pagf_btreeblks;
+ if (xfs_has_rmapbt(pag->pag_mount))
+ allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
+ if (allocbt_blks > 0)
+ atomic64_add(allocbt_blks,
+ &pag->pag_mount->m_allocbt_blks);
+
+ set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
}
#ifdef DEBUG
- else if (!XFS_FORCED_SHUTDOWN(mp)) {
+ else if (!xfs_is_shutdown(pag->pag_mount)) {
ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
}
#endif
- xfs_perag_put(pag);
+ if (agfbpp)
+ *agfbpp = agfbp;
+ else
+ xfs_trans_brelse(tp, agfbp);
return 0;
}
/*
- * Allocate an extent (variable-size).
- * Depending on the allocation type, we either look in a single allocation
- * group or loop over the allocation groups to find the result.
+ * Pre-proces allocation arguments to set initial state that we don't require
+ * callers to set up correctly, as well as bounds check the allocation args
+ * that are set up.
*/
-int /* error */
-xfs_alloc_vextent(
- struct xfs_alloc_arg *args) /* allocation argument structure */
+static int
+xfs_alloc_vextent_check_args(
+ struct xfs_alloc_arg *args,
+ xfs_fsblock_t target,
+ xfs_agnumber_t *minimum_agno)
{
- xfs_agblock_t agsize; /* allocation group size */
- int error;
- int flags; /* XFS_ALLOC_FLAG_... locking flags */
- struct xfs_mount *mp; /* mount structure pointer */
- xfs_agnumber_t sagno; /* starting allocation group number */
- xfs_alloctype_t type; /* input allocation type */
- int bump_rotor = 0;
- xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
-
- mp = args->mp;
- type = args->otype = args->type;
- args->agbno = NULLAGBLOCK;
+ struct xfs_mount *mp = args->mp;
+ xfs_agblock_t agsize;
+
+ args->fsbno = NULLFSBLOCK;
+
+ *minimum_agno = 0;
+ if (args->tp->t_highest_agno != NULLAGNUMBER)
+ *minimum_agno = args->tp->t_highest_agno;
+
/*
* Just fix this up, for the case where the last a.g. is shorter
* (or there's only one a.g.) and the caller couldn't easily figure
args->maxlen = agsize;
if (args->alignment == 0)
args->alignment = 1;
- ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
+
+ ASSERT(args->minlen > 0);
+ ASSERT(args->maxlen > 0);
+ ASSERT(args->alignment > 0);
+ ASSERT(args->resv != XFS_AG_RESV_AGFL);
+
+ ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
+ ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
ASSERT(args->minlen <= args->maxlen);
ASSERT(args->minlen <= agsize);
ASSERT(args->mod < args->prod);
- if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
- XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
+
+ if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
+ XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
args->minlen > args->maxlen || args->minlen > agsize ||
args->mod >= args->prod) {
- args->fsbno = NULLFSBLOCK;
trace_xfs_alloc_vextent_badargs(args);
+ return -ENOSPC;
+ }
+
+ if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
+ trace_xfs_alloc_vextent_skip_deadlock(args);
+ return -ENOSPC;
+ }
+ return 0;
+
+}
+
+/*
+ * Prepare an AG for allocation. If the AG is not prepared to accept the
+ * allocation, return failure.
+ *
+ * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
+ * modified to hold their own perag references.
+ */
+static int
+xfs_alloc_vextent_prepare_ag(
+ struct xfs_alloc_arg *args,
+ uint32_t flags)
+{
+ bool need_pag = !args->pag;
+ int error;
+
+ if (need_pag)
+ args->pag = xfs_perag_get(args->mp, args->agno);
+
+ args->agbp = NULL;
+ error = xfs_alloc_fix_freelist(args, flags);
+ if (error) {
+ trace_xfs_alloc_vextent_nofix(args);
+ if (need_pag)
+ xfs_perag_put(args->pag);
+ args->agbno = NULLAGBLOCK;
+ return error;
+ }
+ if (!args->agbp) {
+ /* cannot allocate in this AG at all */
+ trace_xfs_alloc_vextent_noagbp(args);
+ args->agbno = NULLAGBLOCK;
return 0;
}
+ args->wasfromfl = 0;
+ return 0;
+}
- switch (type) {
- case XFS_ALLOCTYPE_THIS_AG:
- case XFS_ALLOCTYPE_NEAR_BNO:
- case XFS_ALLOCTYPE_THIS_BNO:
- /*
- * These three force us into a single a.g.
- */
- args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
- args->pag = xfs_perag_get(mp, args->agno);
- error = xfs_alloc_fix_freelist(args, 0);
- if (error) {
- trace_xfs_alloc_vextent_nofix(args);
- goto error0;
- }
- if (!args->agbp) {
- trace_xfs_alloc_vextent_noagbp(args);
+/*
+ * Post-process allocation results to account for the allocation if it succeed
+ * and set the allocated block number correctly for the caller.
+ *
+ * XXX: we should really be returning ENOSPC for ENOSPC, not
+ * hiding it behind a "successful" NULLFSBLOCK allocation.
+ */
+static int
+xfs_alloc_vextent_finish(
+ struct xfs_alloc_arg *args,
+ xfs_agnumber_t minimum_agno,
+ int alloc_error,
+ bool drop_perag)
+{
+ struct xfs_mount *mp = args->mp;
+ int error = 0;
+
+ /*
+ * We can end up here with a locked AGF. If we failed, the caller is
+ * likely going to try to allocate again with different parameters, and
+ * that can widen the AGs that are searched for free space. If we have
+ * to do BMBT block allocation, we have to do a new allocation.
+ *
+ * Hence leaving this function with the AGF locked opens up potential
+ * ABBA AGF deadlocks because a future allocation attempt in this
+ * transaction may attempt to lock a lower number AGF.
+ *
+ * We can't release the AGF until the transaction is commited, so at
+ * this point we must update the "first allocation" tracker to point at
+ * this AG if the tracker is empty or points to a lower AG. This allows
+ * the next allocation attempt to be modified appropriately to avoid
+ * deadlocks.
+ */
+ if (args->agbp &&
+ (args->tp->t_highest_agno == NULLAGNUMBER ||
+ args->agno > minimum_agno))
+ args->tp->t_highest_agno = args->agno;
+
+ /*
+ * If the allocation failed with an error or we had an ENOSPC result,
+ * preserve the returned error whilst also marking the allocation result
+ * as "no extent allocated". This ensures that callers that fail to
+ * capture the error will still treat it as a failed allocation.
+ */
+ if (alloc_error || args->agbno == NULLAGBLOCK) {
+ args->fsbno = NULLFSBLOCK;
+ error = alloc_error;
+ goto out_drop_perag;
+ }
+
+ args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
+
+ ASSERT(args->len >= args->minlen);
+ ASSERT(args->len <= args->maxlen);
+ ASSERT(args->agbno % args->alignment == 0);
+ XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
+
+ /* if not file data, insert new block into the reverse map btree */
+ if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
+ error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
+ args->agbno, args->len, &args->oinfo);
+ if (error)
+ goto out_drop_perag;
+ }
+
+ if (!args->wasfromfl) {
+ error = xfs_alloc_update_counters(args->tp, args->agbp,
+ -((long)(args->len)));
+ if (error)
+ goto out_drop_perag;
+
+ ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
+ args->len));
+ }
+
+ xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
+
+ XFS_STATS_INC(mp, xs_allocx);
+ XFS_STATS_ADD(mp, xs_allocb, args->len);
+
+ trace_xfs_alloc_vextent_finish(args);
+
+out_drop_perag:
+ if (drop_perag && args->pag) {
+ xfs_perag_rele(args->pag);
+ args->pag = NULL;
+ }
+ return error;
+}
+
+/*
+ * Allocate within a single AG only. This uses a best-fit length algorithm so if
+ * you need an exact sized allocation without locality constraints, this is the
+ * fastest way to do it.
+ *
+ * Caller is expected to hold a perag reference in args->pag.
+ */
+int
+xfs_alloc_vextent_this_ag(
+ struct xfs_alloc_arg *args,
+ xfs_agnumber_t agno)
+{
+ struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t minimum_agno;
+ int error;
+
+ ASSERT(args->pag != NULL);
+ ASSERT(args->pag->pag_agno == agno);
+
+ args->agno = agno;
+ args->agbno = 0;
+
+ trace_xfs_alloc_vextent_this_ag(args);
+
+ error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
+ &minimum_agno);
+ if (error) {
+ if (error == -ENOSPC)
+ return 0;
+ return error;
+ }
+
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
+ if (!error && args->agbp)
+ error = xfs_alloc_ag_vextent_size(args);
+
+ return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
+}
+
+/*
+ * Iterate all AGs trying to allocate an extent starting from @start_ag.
+ *
+ * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
+ * allocation attempts in @start_agno have locality information. If we fail to
+ * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
+ * we attempt to allocation in as there is no locality optimisation possible for
+ * those allocations.
+ *
+ * On return, args->pag may be left referenced if we finish before the "all
+ * failed" return point. The allocation finish still needs the perag, and
+ * so the caller will release it once they've finished the allocation.
+ *
+ * When we wrap the AG iteration at the end of the filesystem, we have to be
+ * careful not to wrap into AGs below ones we already have locked in the
+ * transaction if we are doing a blocking iteration. This will result in an
+ * out-of-order locking of AGFs and hence can cause deadlocks.
+ */
+static int
+xfs_alloc_vextent_iterate_ags(
+ struct xfs_alloc_arg *args,
+ xfs_agnumber_t minimum_agno,
+ xfs_agnumber_t start_agno,
+ xfs_agblock_t target_agbno,
+ uint32_t flags)
+{
+ struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t restart_agno = minimum_agno;
+ xfs_agnumber_t agno;
+ int error = 0;
+
+ if (flags & XFS_ALLOC_FLAG_TRYLOCK)
+ restart_agno = 0;
+restart:
+ for_each_perag_wrap_range(mp, start_agno, restart_agno,
+ mp->m_sb.sb_agcount, agno, args->pag) {
+ args->agno = agno;
+ error = xfs_alloc_vextent_prepare_ag(args, flags);
+ if (error)
break;
+ if (!args->agbp) {
+ trace_xfs_alloc_vextent_loopfailed(args);
+ continue;
}
- args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
- if ((error = xfs_alloc_ag_vextent(args)))
- goto error0;
- break;
- case XFS_ALLOCTYPE_START_BNO:
- /*
- * Try near allocation first, then anywhere-in-ag after
- * the first a.g. fails.
- */
- if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
- (mp->m_flags & XFS_MOUNT_32BITINODES)) {
- args->fsbno = XFS_AGB_TO_FSB(mp,
- ((mp->m_agfrotor / rotorstep) %
- mp->m_sb.sb_agcount), 0);
- bump_rotor = 1;
- }
- args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
- args->type = XFS_ALLOCTYPE_NEAR_BNO;
- /* FALLTHROUGH */
- case XFS_ALLOCTYPE_FIRST_AG:
+
/*
- * Rotate through the allocation groups looking for a winner.
+ * Allocation is supposed to succeed now, so break out of the
+ * loop regardless of whether we succeed or not.
*/
- if (type == XFS_ALLOCTYPE_FIRST_AG) {
- /*
- * Start with allocation group given by bno.
- */
- args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
- args->type = XFS_ALLOCTYPE_THIS_AG;
- sagno = 0;
- flags = 0;
+ if (args->agno == start_agno && target_agbno) {
+ args->agbno = target_agbno;
+ error = xfs_alloc_ag_vextent_near(args);
} else {
- /*
- * Start with the given allocation group.
- */
- args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
- flags = XFS_ALLOC_FLAG_TRYLOCK;
- }
- /*
- * Loop over allocation groups twice; first time with
- * trylock set, second time without.
- */
- for (;;) {
- args->pag = xfs_perag_get(mp, args->agno);
- error = xfs_alloc_fix_freelist(args, flags);
- if (error) {
- trace_xfs_alloc_vextent_nofix(args);
- goto error0;
- }
- /*
- * If we get a buffer back then the allocation will fly.
- */
- if (args->agbp) {
- if ((error = xfs_alloc_ag_vextent(args)))
- goto error0;
- break;
- }
-
- trace_xfs_alloc_vextent_loopfailed(args);
-
- /*
- * Didn't work, figure out the next iteration.
- */
- if (args->agno == sagno &&
- type == XFS_ALLOCTYPE_START_BNO)
- args->type = XFS_ALLOCTYPE_THIS_AG;
- /*
- * For the first allocation, we can try any AG to get
- * space. However, if we already have allocated a
- * block, we don't want to try AGs whose number is below
- * sagno. Otherwise, we may end up with out-of-order
- * locking of AGF, which might cause deadlock.
- */
- if (++(args->agno) == mp->m_sb.sb_agcount) {
- if (args->tp->t_firstblock != NULLFSBLOCK)
- args->agno = sagno;
- else
- args->agno = 0;
- }
- /*
- * Reached the starting a.g., must either be done
- * or switch to non-trylock mode.
- */
- if (args->agno == sagno) {
- if (flags == 0) {
- args->agbno = NULLAGBLOCK;
- trace_xfs_alloc_vextent_allfailed(args);
- break;
- }
-
- flags = 0;
- if (type == XFS_ALLOCTYPE_START_BNO) {
- args->agbno = XFS_FSB_TO_AGBNO(mp,
- args->fsbno);
- args->type = XFS_ALLOCTYPE_NEAR_BNO;
- }
- }
- xfs_perag_put(args->pag);
- }
- if (bump_rotor) {
- if (args->agno == sagno)
- mp->m_agfrotor = (mp->m_agfrotor + 1) %
- (mp->m_sb.sb_agcount * rotorstep);
- else
- mp->m_agfrotor = (args->agno * rotorstep + 1) %
- (mp->m_sb.sb_agcount * rotorstep);
+ args->agbno = 0;
+ error = xfs_alloc_ag_vextent_size(args);
}
break;
- default:
- ASSERT(0);
- /* NOTREACHED */
}
- if (args->agbno == NULLAGBLOCK)
- args->fsbno = NULLFSBLOCK;
- else {
- args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
-#ifdef DEBUG
- ASSERT(args->len >= args->minlen);
- ASSERT(args->len <= args->maxlen);
- ASSERT(args->agbno % args->alignment == 0);
- XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
- args->len);
-#endif
+ if (error) {
+ xfs_perag_rele(args->pag);
+ args->pag = NULL;
+ return error;
+ }
+ if (args->agbp)
+ return 0;
+ /*
+ * We didn't find an AG we can alloation from. If we were given
+ * constraining flags by the caller, drop them and retry the allocation
+ * without any constraints being set.
+ */
+ if (flags) {
+ flags = 0;
+ restart_agno = minimum_agno;
+ goto restart;
}
- xfs_perag_put(args->pag);
+
+ ASSERT(args->pag == NULL);
+ trace_xfs_alloc_vextent_allfailed(args);
return 0;
-error0:
- xfs_perag_put(args->pag);
- return error;
+}
+
+/*
+ * Iterate from the AGs from the start AG to the end of the filesystem, trying
+ * to allocate blocks. It starts with a near allocation attempt in the initial
+ * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
+ * back to zero if allowed by previous allocations in this transaction,
+ * otherwise will wrap back to the start AG and run a second blocking pass to
+ * the end of the filesystem.
+ */
+int
+xfs_alloc_vextent_start_ag(
+ struct xfs_alloc_arg *args,
+ xfs_fsblock_t target)
+{
+ struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t minimum_agno;
+ xfs_agnumber_t start_agno;
+ xfs_agnumber_t rotorstep = xfs_rotorstep;
+ bool bump_rotor = false;
+ int error;
+
+ ASSERT(args->pag == NULL);
+
+ args->agno = NULLAGNUMBER;
+ args->agbno = NULLAGBLOCK;
+
+ trace_xfs_alloc_vextent_start_ag(args);
+
+ error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
+ if (error) {
+ if (error == -ENOSPC)
+ return 0;
+ return error;
+ }
+
+ if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
+ xfs_is_inode32(mp)) {
+ target = XFS_AGB_TO_FSB(mp,
+ ((mp->m_agfrotor / rotorstep) %
+ mp->m_sb.sb_agcount), 0);
+ bump_rotor = 1;
+ }
+
+ start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
+ error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
+ XFS_FSB_TO_AGBNO(mp, target), XFS_ALLOC_FLAG_TRYLOCK);
+
+ if (bump_rotor) {
+ if (args->agno == start_agno)
+ mp->m_agfrotor = (mp->m_agfrotor + 1) %
+ (mp->m_sb.sb_agcount * rotorstep);
+ else
+ mp->m_agfrotor = (args->agno * rotorstep + 1) %
+ (mp->m_sb.sb_agcount * rotorstep);
+ }
+
+ return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
+}
+
+/*
+ * Iterate from the agno indicated via @target through to the end of the
+ * filesystem attempting blocking allocation. This does not wrap or try a second
+ * pass, so will not recurse into AGs lower than indicated by the target.
+ */
+int
+xfs_alloc_vextent_first_ag(
+ struct xfs_alloc_arg *args,
+ xfs_fsblock_t target)
+ {
+ struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t minimum_agno;
+ xfs_agnumber_t start_agno;
+ int error;
+
+ ASSERT(args->pag == NULL);
+
+ args->agno = NULLAGNUMBER;
+ args->agbno = NULLAGBLOCK;
+
+ trace_xfs_alloc_vextent_first_ag(args);
+
+ error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
+ if (error) {
+ if (error == -ENOSPC)
+ return 0;
+ return error;
+ }
+
+ start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
+ error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
+ XFS_FSB_TO_AGBNO(mp, target), 0);
+ return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
+}
+
+/*
+ * Allocate at the exact block target or fail. Caller is expected to hold a
+ * perag reference in args->pag.
+ */
+int
+xfs_alloc_vextent_exact_bno(
+ struct xfs_alloc_arg *args,
+ xfs_fsblock_t target)
+{
+ struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t minimum_agno;
+ int error;
+
+ ASSERT(args->pag != NULL);
+ ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
+
+ args->agno = XFS_FSB_TO_AGNO(mp, target);
+ args->agbno = XFS_FSB_TO_AGBNO(mp, target);
+
+ trace_xfs_alloc_vextent_exact_bno(args);
+
+ error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
+ if (error) {
+ if (error == -ENOSPC)
+ return 0;
+ return error;
+ }
+
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
+ if (!error && args->agbp)
+ error = xfs_alloc_ag_vextent_exact(args);
+
+ return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
+}
+
+/*
+ * Allocate an extent as close to the target as possible. If there are not
+ * viable candidates in the AG, then fail the allocation.
+ *
+ * Caller may or may not have a per-ag reference in args->pag.
+ */
+int
+xfs_alloc_vextent_near_bno(
+ struct xfs_alloc_arg *args,
+ xfs_fsblock_t target)
+{
+ struct xfs_mount *mp = args->mp;
+ xfs_agnumber_t minimum_agno;
+ bool needs_perag = args->pag == NULL;
+ int error;
+
+ if (!needs_perag)
+ ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
+
+ args->agno = XFS_FSB_TO_AGNO(mp, target);
+ args->agbno = XFS_FSB_TO_AGBNO(mp, target);
+
+ trace_xfs_alloc_vextent_near_bno(args);
+
+ error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
+ if (error) {
+ if (error == -ENOSPC)
+ return 0;
+ return error;
+ }
+
+ if (needs_perag)
+ args->pag = xfs_perag_grab(mp, args->agno);
+
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
+ if (!error && args->agbp)
+ error = xfs_alloc_ag_vextent_near(args);
+
+ return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
}
/* Ensure that the freelist is at full capacity. */
int
xfs_free_extent_fix_freelist(
struct xfs_trans *tp,
- xfs_agnumber_t agno,
+ struct xfs_perag *pag,
struct xfs_buf **agbp)
{
struct xfs_alloc_arg args;
memset(&args, 0, sizeof(struct xfs_alloc_arg));
args.tp = tp;
args.mp = tp->t_mountp;
- args.agno = agno;
+ args.agno = pag->pag_agno;
+ args.pag = pag;
/*
* validate that the block number is legal - the enables us to detect
if (args.agno >= args.mp->m_sb.sb_agcount)
return -EFSCORRUPTED;
- args.pag = xfs_perag_get(args.mp, args.agno);
- ASSERT(args.pag);
-
error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
if (error)
- goto out;
+ return error;
*agbp = args.agbp;
-out:
- xfs_perag_put(args.pag);
- return error;
+ return 0;
}
/*
int
__xfs_free_extent(
struct xfs_trans *tp,
- xfs_fsblock_t bno,
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *agbp;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
+ struct xfs_agf *agf;
int error;
unsigned int busy_flags = 0;
XFS_ERRTAG_FREE_EXTENT))
return -EIO;
- error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
+ error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
if (error)
return error;
+ agf = agbp->b_addr;
if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
error = -EFSCORRUPTED;
- goto err;
+ goto err_release;
}
/* validate the extent size is legal now we have the agf locked */
- if (XFS_IS_CORRUPT(mp,
- agbno + len >
- be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length))) {
+ if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
error = -EFSCORRUPTED;
- goto err;
+ goto err_release;
}
- error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
+ error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
+ type);
if (error)
- goto err;
+ goto err_release;
if (skip_discard)
busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
- xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
+ xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
return 0;
-err:
+err_release:
xfs_trans_brelse(tp, agbp);
return error;
}
STATIC int
xfs_alloc_query_range_helper(
struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
+ const union xfs_btree_rec *rec,
void *priv)
{
struct xfs_alloc_query_range_info *query = priv;
struct xfs_alloc_rec_incore irec;
+ xfs_failaddr_t fa;
+
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ fa = xfs_alloc_check_irec(cur, &irec);
+ if (fa)
+ return xfs_alloc_complain_bad_rec(cur, fa, &irec);
- irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
- irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
return query->fn(cur, &irec, query->priv);
}
int
xfs_alloc_query_range(
struct xfs_btree_cur *cur,
- struct xfs_alloc_rec_incore *low_rec,
- struct xfs_alloc_rec_incore *high_rec,
+ const struct xfs_alloc_rec_incore *low_rec,
+ const struct xfs_alloc_rec_incore *high_rec,
xfs_alloc_query_range_fn fn,
void *priv)
{
return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
}
-/* Is there a record covering a given extent? */
+/*
+ * Scan part of the keyspace of the free space and tell us if the area has no
+ * records, is fully mapped by records, or is partially filled.
+ */
int
-xfs_alloc_has_record(
+xfs_alloc_has_records(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
xfs_extlen_t len,
- bool *exists)
+ enum xbtree_recpacking *outcome)
{
union xfs_btree_irec low;
union xfs_btree_irec high;
memset(&high, 0xFF, sizeof(high));
high.a.ar_startblock = bno + len - 1;
- return xfs_btree_has_record(cur, &low, &high, exists);
+ return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
}
/*
unsigned int i;
int error;
- agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+ agfl_bno = xfs_buf_to_agfl_bno(agflbp);
i = be32_to_cpu(agf->agf_flfirst);
/* Nothing to walk in an empty AGFL. */
return 0;
}
+
+int __init
+xfs_extfree_intent_init_cache(void)
+{
+ xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
+ sizeof(struct xfs_extent_free_item),
+ 0, 0, NULL);
+
+ return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
+}
+
+void
+xfs_extfree_intent_destroy_cache(void)
+{
+ kmem_cache_destroy(xfs_extfree_item_cache);
+ xfs_extfree_item_cache = NULL;
+}