* Copyright (C) 2016 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*/
-#include <libxfs.h>
+#include "libxfs.h"
#include "btree.h"
#include "err_protos.h"
#include "libxlog.h"
#include "dinode.h"
#include "slab.h"
#include "rmap.h"
+#include "libfrog/bitmap.h"
#undef RMAP_DEBUG
};
static struct xfs_ag_rmap *ag_rmaps;
-static bool rmapbt_suspect;
+bool rmapbt_suspect;
static bool refcbt_suspect;
static inline int rmap_compare(const void *a, const void *b)
rmap_needs_work(
struct xfs_mount *mp)
{
- return xfs_sb_version_hasreflink(&mp->m_sb) ||
- xfs_sb_version_hasrmapbt(&mp->m_sb);
+ return xfs_has_reflink(mp) ||
+ xfs_has_rmapbt(mp);
}
/*
{
struct xfs_slab_cursor *cur = NULL;
struct xfs_rmap_irec *prev, *rec;
- size_t old_sz;
+ uint64_t old_sz;
int error = 0;
old_sz = slab_count(ag_rmaps[agno].ar_rmaps);
/* inodes */
ino_rec = findfirst_inode_rec(agno);
for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
- if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
+ if (xfs_has_sparseinodes(mp)) {
startidx = find_first_zero_bit(ino_rec->ir_sparse);
nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
} else {
struct xfs_buf *agbp = NULL;
struct xfs_buf *agflbp = NULL;
struct xfs_trans *tp;
- struct xfs_trans_res tres = {0};
__be32 *agfl_bno, *b;
+ struct xfs_ag_rmap *ag_rmap = &ag_rmaps[agno];
+ struct bitmap *own_ag_bitmap = NULL;
int error = 0;
- struct xfs_owner_info oinfo;
- if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ if (!xfs_has_rmapbt(mp))
return 0;
/* Release the ar_rmaps; they were put into the rmapbt during p5. */
- free_slab(&ag_rmaps[agno].ar_rmaps);
- error = init_slab(&ag_rmaps[agno].ar_rmaps,
- sizeof(struct xfs_rmap_irec));
+ free_slab(&ag_rmap->ar_rmaps);
+ error = init_slab(&ag_rmap->ar_rmaps, sizeof(struct xfs_rmap_irec));
if (error)
goto err;
* rmap, we only need to add rmap records for AGFL blocks past
* that point in the AGFL because those blocks are a result of a
* no-rmap no-shrink freelist fixup that we did earlier.
+ *
+ * However, some blocks end up on the AGFL because the free space
+ * btrees shed blocks as a result of allocating space to fix the
+ * freelist. We already created in-core rmap records for the free
+ * space btree blocks, so we must be careful not to create those
+ * records again. Create a bitmap of already-recorded OWN_AG rmaps.
*/
- agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
- b = agfl_bno + ag_rmaps[agno].ar_flcount;
- while (*b != NULLAGBLOCK && b - agfl_bno < libxfs_agfl_size(mp)) {
- error = rmap_add_ag_rec(mp, agno, be32_to_cpu(*b), 1,
- XFS_RMAP_OWN_AG);
- if (error)
- goto err;
+ error = init_slab_cursor(ag_rmap->ar_raw_rmaps, rmap_compare, &rm_cur);
+ if (error)
+ goto err;
+ error = -bitmap_alloc(&own_ag_bitmap);
+ if (error)
+ goto err_slab;
+ while ((rm_rec = pop_slab_cursor(rm_cur)) != NULL) {
+ if (rm_rec->rm_owner != XFS_RMAP_OWN_AG)
+ continue;
+ error = -bitmap_set(own_ag_bitmap, rm_rec->rm_startblock,
+ rm_rec->rm_blockcount);
+ if (error) {
+ /*
+ * If this range is already set, then the incore rmap
+ * records for the AG free space btrees overlap and
+ * we're toast because that is not allowed.
+ */
+ if (error == EEXIST)
+ error = EFSCORRUPTED;
+ goto err_slab;
+ }
+ }
+ free_slab_cursor(&rm_cur);
+
+ /* Create rmaps for any AGFL blocks that aren't already rmapped. */
+ agfl_bno = xfs_buf_to_agfl_bno(agflbp);
+ b = agfl_bno + ag_rmap->ar_flcount;
+ while (*b != cpu_to_be32(NULLAGBLOCK) &&
+ b - agfl_bno < libxfs_agfl_size(mp)) {
+ xfs_agblock_t agbno;
+
+ agbno = be32_to_cpu(*b);
+ if (!bitmap_test(own_ag_bitmap, agbno, 1)) {
+ error = rmap_add_ag_rec(mp, agno, agbno, 1,
+ XFS_RMAP_OWN_AG);
+ if (error)
+ goto err;
+ }
b++;
}
- libxfs_putbuf(agflbp);
+ libxfs_buf_relse(agflbp);
agflbp = NULL;
+ bitmap_free(&own_ag_bitmap);
/* Merge all the raw rmaps into the main list */
error = rmap_fold_raw_recs(mp, agno);
goto err;
/* Create cursors to refcount structures */
- error = init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare,
- &rm_cur);
+ error = init_slab_cursor(ag_rmap->ar_rmaps, rmap_compare, &rm_cur);
if (error)
goto err;
/* Insert rmaps into the btree one at a time */
rm_rec = pop_slab_cursor(rm_cur);
while (rm_rec) {
- error = -libxfs_trans_alloc(mp, &tres, 16, 0, 0, &tp);
+ struct xfs_owner_info oinfo = {};
+ struct xfs_perag *pag;
+
+ error = -libxfs_trans_alloc_rollable(mp, 16, &tp);
if (error)
goto err_slab;
- error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
- if (error)
+ pag = libxfs_perag_get(mp, agno);
+ error = -libxfs_alloc_read_agf(pag, tp, 0, &agbp);
+ if (error) {
+ libxfs_perag_put(pag);
goto err_trans;
+ }
ASSERT(XFS_RMAP_NON_INODE_OWNER(rm_rec->rm_owner));
- libxfs_rmap_ag_owner(&oinfo, rm_rec->rm_owner);
- error = -libxfs_rmap_alloc(tp, agbp, agno, rm_rec->rm_startblock,
+ oinfo.oi_owner = rm_rec->rm_owner;
+ error = -libxfs_rmap_alloc(tp, agbp, pag, rm_rec->rm_startblock,
rm_rec->rm_blockcount, &oinfo);
+ libxfs_perag_put(pag);
if (error)
goto err_trans;
free_slab_cursor(&rm_cur);
err:
if (agflbp)
- libxfs_putbuf(agflbp);
+ libxfs_buf_relse(agflbp);
+ if (own_ag_bitmap)
+ bitmap_free(&own_ag_bitmap);
return error;
}
struct xfs_rmap_irec *rmap;
struct ino_tree_node *irec;
int off;
- size_t idx;
+ uint64_t idx;
xfs_agino_t ino;
if (bag_count(rmaps) < 2)
/*
* Emit a refcount object for refcntbt reconstruction during phase 5.
*/
-#define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
static void
refcount_emit(
- struct xfs_mount *mp,
+ struct xfs_mount *mp,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_extlen_t len,
- size_t nr_rmaps)
+ uint64_t nr_rmaps)
{
struct xfs_refcount_irec rlrec;
int error;
agno, agbno, len, nr_rmaps);
rlrec.rc_startblock = agbno;
rlrec.rc_blockcount = len;
- rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
+ nr_rmaps = min(nr_rmaps, MAXREFCOUNT);
+ rlrec.rc_refcount = nr_rmaps;
+ rlrec.rc_domain = XFS_REFC_DOMAIN_SHARED;
+
error = slab_add(rlslab, &rlrec);
if (error)
do_error(
_("Insufficient memory while recreating refcount tree."));
}
-#undef REFCOUNT_CLAMP
/*
* Transform a pile of physical block mapping observations into refcount data
struct xfs_slab_cursor *rmaps_cur;
struct xfs_rmap_irec *array_cur;
struct xfs_rmap_irec *rmap;
+ uint64_t n, idx;
+ uint64_t old_stack_nr;
xfs_agblock_t sbno; /* first bno of this rmap set */
xfs_agblock_t cbno; /* first bno of this refcount set */
xfs_agblock_t nbno; /* next bno where rmap set changes */
- size_t n, idx;
- size_t old_stack_nr;
int error;
- if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ if (!xfs_has_reflink(mp))
return 0;
rmaps = ag_rmaps[agno].ar_rmaps;
/*
* Return the number of rmap objects for an AG.
*/
-size_t
+uint64_t
rmap_record_count(
- struct xfs_mount *mp,
+ struct xfs_mount *mp,
xfs_agnumber_t agno)
{
return slab_count(ag_rmaps[agno].ar_rmaps);
struct xfs_rmap_irec *tmp,
int *have)
{
- int error;
-
/* Use the regular btree retrieval routine. */
- error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
- rm_rec->rm_blockcount,
+ return -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
rm_rec->rm_owner, rm_rec->rm_offset,
- rm_rec->rm_flags, have);
- if (error)
- return error;
- if (*have == 0)
- return error;
- return -libxfs_rmap_get_rec(bt_cur, tmp, have);
+ rm_rec->rm_flags, tmp, have);
}
/* Look for an rmap in the rmapbt that matches a given rmap. */
/*
* Compare the observed reverse mappings against what's in the ag btree.
*/
-int
+void
rmaps_verify_btree(
struct xfs_mount *mp,
xfs_agnumber_t agno)
{
+ struct xfs_rmap_irec tmp;
struct xfs_slab_cursor *rm_cur;
struct xfs_btree_cur *bt_cur = NULL;
- int error;
- int have;
struct xfs_buf *agbp = NULL;
struct xfs_rmap_irec *rm_rec;
- struct xfs_rmap_irec tmp;
- struct xfs_perag *pag; /* per allocation group data */
+ struct xfs_perag *pag = NULL;
+ int have;
+ int error;
- if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
- return 0;
+ if (!xfs_has_rmapbt(mp))
+ return;
if (rmapbt_suspect) {
if (no_modify && agno == 0)
do_warn(_("would rebuild corrupt rmap btrees.\n"));
- return 0;
+ return;
}
/* Create cursors to refcount structures */
error = rmap_init_cursor(agno, &rm_cur);
- if (error)
- return error;
+ if (error) {
+ do_warn(_("Not enough memory to check reverse mappings.\n"));
+ return;
+ }
- error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
- if (error)
- goto err;
+ pag = libxfs_perag_get(mp, agno);
+ error = -libxfs_alloc_read_agf(pag, NULL, 0, &agbp);
+ if (error) {
+ do_warn(_("Could not read AGF %u to check rmap btree.\n"),
+ agno);
+ goto err_pag;
+ }
/* Leave the per-ag data "uninitialized" since we rewrite it later */
- pag = libxfs_perag_get(mp, agno);
- pag->pagf_init = 0;
- libxfs_perag_put(pag);
+ clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
- bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
+ bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, pag);
if (!bt_cur) {
- error = -ENOMEM;
- goto err;
+ do_warn(_("Not enough memory to check reverse mappings.\n"));
+ goto err_agf;
}
rm_rec = pop_slab_cursor(rm_cur);
while (rm_rec) {
error = rmap_lookup(bt_cur, rm_rec, &tmp, &have);
- if (error)
- goto err;
+ if (error) {
+ do_warn(
+_("Could not read reverse-mapping record for (%u/%u).\n"),
+ agno, rm_rec->rm_startblock);
+ goto err_cur;
+ }
+
/*
* Using the range query is expensive, so only do it if
* the regular lookup doesn't find anything or if it doesn't
* match the observed rmap.
*/
- if (xfs_sb_version_hasreflink(&bt_cur->bc_mp->m_sb) &&
+ if (xfs_has_reflink(bt_cur->bc_mp) &&
(!have || !rmap_is_good(rm_rec, &tmp))) {
error = rmap_lookup_overlapped(bt_cur, rm_rec,
&tmp, &have);
- if (error)
- goto err;
+ if (error) {
+ do_warn(
+_("Could not read reverse-mapping record for (%u/%u).\n"),
+ agno, rm_rec->rm_startblock);
+ goto err_cur;
+ }
}
if (!have) {
do_warn(
rm_rec = pop_slab_cursor(rm_cur);
}
-err:
- if (bt_cur)
- libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
- if (agbp)
- libxfs_putbuf(agbp);
+err_cur:
+ libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+err_agf:
+ libxfs_buf_relse(agbp);
+err_pag:
+ libxfs_perag_put(pag);
free_slab_cursor(&rm_cur);
- return 0;
}
/*
(unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
}
+/*
+ * Inform the user that we're clearing the reflink flag on an inode that
+ * doesn't actually share any blocks. This is an optimization (the kernel
+ * skips refcount checks for non-reflink files) and not a corruption repair,
+ * so we don't need to log every time we clear a flag unless verbose mode is
+ * enabled.
+ */
+static void
+warn_clearing_reflink(
+ xfs_ino_t ino)
+{
+ static bool warned = false;
+ static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+ if (verbose) {
+ do_warn(_("clearing reflink flag on inode %"PRIu64"\n"), ino);
+ return;
+ }
+
+ if (warned)
+ return;
+
+ pthread_mutex_lock(&lock);
+ if (!warned) {
+ do_warn(_("clearing reflink flag on inodes when possible\n"));
+ warned = true;
+ }
+ pthread_mutex_unlock(&lock);
+}
+
/*
* Fix an inode's reflink flag.
*/
_("setting reflink flag on inode %"PRIu64"\n"),
XFS_AGINO_TO_INO(mp, agno, agino));
else if (!no_modify) /* && !set */
- do_warn(
-_("clearing reflink flag on inode %"PRIu64"\n"),
- XFS_AGINO_TO_INO(mp, agno, agino));
+ warn_clearing_reflink(XFS_AGINO_TO_INO(mp, agno, agino));
if (no_modify)
return 0;
else
dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
libxfs_dinode_calc_crc(mp, dino);
- libxfs_writebuf(buf, 0);
+ libxfs_buf_mark_dirty(buf);
+ libxfs_buf_relse(buf);
return 0;
}
/*
* Return the number of refcount objects for an AG.
*/
-size_t
+uint64_t
refcount_record_count(
- struct xfs_mount *mp,
+ struct xfs_mount *mp,
xfs_agnumber_t agno)
{
return slab_count(ag_rmaps[agno].ar_refcount_items);
/*
* Compare the observed reference counts against what's in the ag btree.
*/
-int
+void
check_refcounts(
- struct xfs_mount *mp,
- xfs_agnumber_t agno)
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
{
- struct xfs_slab_cursor *rl_cur;
- struct xfs_btree_cur *bt_cur = NULL;
- int error;
- int have;
- int i;
- struct xfs_buf *agbp = NULL;
- struct xfs_refcount_irec *rl_rec;
struct xfs_refcount_irec tmp;
- struct xfs_perag *pag; /* per allocation group data */
+ struct xfs_slab_cursor *rl_cur;
+ struct xfs_btree_cur *bt_cur = NULL;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_perag *pag = NULL;
+ struct xfs_refcount_irec *rl_rec;
+ int have;
+ int i;
+ int error;
- if (!xfs_sb_version_hasreflink(&mp->m_sb))
- return 0;
+ if (!xfs_has_reflink(mp))
+ return;
if (refcbt_suspect) {
if (no_modify && agno == 0)
do_warn(_("would rebuild corrupt refcount btrees.\n"));
- return 0;
+ return;
}
/* Create cursors to refcount structures */
error = init_refcount_cursor(agno, &rl_cur);
- if (error)
- return error;
+ if (error) {
+ do_warn(_("Not enough memory to check refcount data.\n"));
+ return;
+ }
- error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
- if (error)
- goto err;
+ pag = libxfs_perag_get(mp, agno);
+ error = -libxfs_alloc_read_agf(pag, NULL, 0, &agbp);
+ if (error) {
+ do_warn(_("Could not read AGF %u to check refcount btree.\n"),
+ agno);
+ goto err_pag;
+ }
/* Leave the per-ag data "uninitialized" since we rewrite it later */
- pag = libxfs_perag_get(mp, agno);
- pag->pagf_init = 0;
- libxfs_perag_put(pag);
+ clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
- bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+ bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, pag);
if (!bt_cur) {
- error = -ENOMEM;
- goto err;
+ do_warn(_("Not enough memory to check refcount data.\n"));
+ goto err_agf;
}
rl_rec = pop_slab_cursor(rl_cur);
while (rl_rec) {
/* Look for a refcount record in the btree */
error = -libxfs_refcount_lookup_le(bt_cur,
- rl_rec->rc_startblock, &have);
- if (error)
- goto err;
+ XFS_REFC_DOMAIN_SHARED, rl_rec->rc_startblock,
+ &have);
+ if (error) {
+ do_warn(
+_("Could not read reference count record for (%u/%u).\n"),
+ agno, rl_rec->rc_startblock);
+ goto err_cur;
+ }
if (!have) {
do_warn(
_("Missing reference count record for (%u/%u) len %u count %u\n"),
}
error = -libxfs_refcount_get_rec(bt_cur, &tmp, &i);
- if (error)
- goto err;
+ if (error) {
+ do_warn(
+_("Could not read reference count record for (%u/%u).\n"),
+ agno, rl_rec->rc_startblock);
+ goto err_cur;
+ }
if (!i) {
do_warn(
_("Missing reference count record for (%u/%u) len %u count %u\n"),
}
/* Compare each refcount observation against the btree's */
- if (tmp.rc_startblock != rl_rec->rc_startblock ||
- tmp.rc_blockcount < rl_rec->rc_blockcount ||
- tmp.rc_refcount < rl_rec->rc_refcount)
+ if (tmp.rc_domain != rl_rec->rc_domain ||
+ tmp.rc_startblock != rl_rec->rc_startblock ||
+ tmp.rc_blockcount != rl_rec->rc_blockcount ||
+ tmp.rc_refcount != rl_rec->rc_refcount) {
+ unsigned int start;
+
+ start = xfs_refcount_encode_startblock(
+ tmp.rc_startblock, tmp.rc_domain);
+
do_warn(
_("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
- agno, tmp.rc_startblock, tmp.rc_blockcount,
+ agno, start, tmp.rc_blockcount,
tmp.rc_refcount, agno, rl_rec->rc_startblock,
rl_rec->rc_blockcount, rl_rec->rc_refcount);
+ }
next_loop:
rl_rec = pop_slab_cursor(rl_cur);
}
-err:
- if (bt_cur)
- libxfs_btree_del_cursor(bt_cur, error ? XFS_BTREE_ERROR :
- XFS_BTREE_NOERROR);
- if (agbp)
- libxfs_putbuf(agbp);
+err_cur:
+ libxfs_btree_del_cursor(bt_cur, error);
+err_agf:
+ libxfs_buf_relse(agbp);
+err_pag:
+ libxfs_perag_put(pag);
free_slab_cursor(&rl_cur);
- return 0;
}
/*
{
xfs_alloc_arg_t args;
xfs_trans_t *tp;
- struct xfs_trans_res tres = {0};
int flags;
int error;
args.agno = agno;
args.alignment = 1;
args.pag = libxfs_perag_get(mp, agno);
- error = -libxfs_trans_alloc(mp, &tres,
- libxfs_alloc_min_freelist(mp, args.pag), 0, 0, &tp);
+ error = -libxfs_trans_alloc_rollable(mp, 0, &tp);
if (error)
do_error(_("failed to fix AGFL on AG %d, error %d\n"),
agno, error);
do_error(_("failed to fix AGFL on AG %d, error %d\n"),
agno, error);
}
- libxfs_trans_commit(tp);
+ error = -libxfs_trans_commit(tp);
+ if (error)
+ do_error(_("%s: commit failed, error %d\n"), __func__, error);
}
/*
ag_rmaps[agno].ar_flcount = count;
}
+
+/* Estimate the size of the ondisk rmapbt from the incore data. */
+xfs_extlen_t
+estimate_rmapbt_blocks(
+ struct xfs_perag *pag)
+{
+ struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_ag_rmap *x;
+ unsigned long long nr_recs = 0;
+
+ if (!rmap_needs_work(mp) || !xfs_has_rmapbt(mp))
+ return 0;
+
+ /*
+ * Overestimate the amount of space needed by pretending that every
+ * record in the incore slab will become rmapbt records.
+ */
+ x = &ag_rmaps[pag->pag_agno];
+ if (x->ar_rmaps)
+ nr_recs += slab_count(x->ar_rmaps);
+ if (x->ar_raw_rmaps)
+ nr_recs += slab_count(x->ar_raw_rmaps);
+
+ return libxfs_rmapbt_calc_size(mp, nr_recs);
+}
+
+/* Estimate the size of the ondisk refcountbt from the incore data. */
+xfs_extlen_t
+estimate_refcountbt_blocks(
+ struct xfs_perag *pag)
+{
+ struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_ag_rmap *x;
+
+ if (!rmap_needs_work(mp) || !xfs_has_reflink(mp))
+ return 0;
+
+ x = &ag_rmaps[pag->pag_agno];
+ if (!x->ar_refcount_items)
+ return 0;
+
+ return libxfs_refcountbt_calc_size(mp,
+ slab_count(x->ar_refcount_items));
+}