+// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 2016 Oracle. All Rights Reserved.
- *
* Author: Darrick J. Wong <darrick.wong@oracle.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
-#include <libxfs.h>
+#include "libxfs.h"
#include "btree.h"
#include "err_protos.h"
#include "libxlog.h"
#include "dinode.h"
#include "slab.h"
#include "rmap.h"
+#include "libfrog/bitmap.h"
#undef RMAP_DEBUG
static struct xfs_ag_rmap *ag_rmaps;
static bool rmapbt_suspect;
+static bool refcbt_suspect;
-/*
- * Compare rmap observations for array sorting.
- */
-static int
-rmap_compare(
- const void *a,
- const void *b)
+static inline int rmap_compare(const void *a, const void *b)
{
- const struct xfs_rmap_irec *pa;
- const struct xfs_rmap_irec *pb;
- __u64 oa;
- __u64 ob;
-
- pa = a; pb = b;
- oa = libxfs_rmap_irec_offset_pack(pa);
- ob = libxfs_rmap_irec_offset_pack(pb);
-
- if (pa->rm_startblock < pb->rm_startblock)
- return -1;
- else if (pa->rm_startblock > pb->rm_startblock)
- return 1;
- else if (pa->rm_owner < pb->rm_owner)
- return -1;
- else if (pa->rm_owner > pb->rm_owner)
- return 1;
- else if (oa < ob)
- return -1;
- else if (oa > ob)
- return 1;
- else
- return 0;
+ return libxfs_rmap_compare(a, b);
}
/*
rmap_needs_work(
struct xfs_mount *mp)
{
- return xfs_sb_version_hasreflink(&mp->m_sb) ||
- xfs_sb_version_hasrmapbt(&mp->m_sb);
+ return xfs_has_reflink(mp) ||
+ xfs_has_rmapbt(mp);
}
/*
static int
find_first_zero_bit(
- __uint64_t mask)
+ uint64_t mask)
{
int n;
int b = 0;
static int
popcnt(
- __uint64_t mask)
+ uint64_t mask)
{
int n;
int b = 0;
/* inodes */
ino_rec = findfirst_inode_rec(agno);
for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
- if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
+ if (xfs_has_sparseinodes(mp)) {
startidx = find_first_zero_bit(ino_rec->ir_sparse);
nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
} else {
struct xfs_buf *agbp = NULL;
struct xfs_buf *agflbp = NULL;
struct xfs_trans *tp;
- struct xfs_trans_res tres = {0};
__be32 *agfl_bno, *b;
+ struct xfs_ag_rmap *ag_rmap = &ag_rmaps[agno];
+ struct bitmap *own_ag_bitmap = NULL;
int error = 0;
- struct xfs_owner_info oinfo;
- if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ if (!xfs_has_rmapbt(mp))
return 0;
/* Release the ar_rmaps; they were put into the rmapbt during p5. */
- free_slab(&ag_rmaps[agno].ar_rmaps);
- error = init_slab(&ag_rmaps[agno].ar_rmaps,
- sizeof(struct xfs_rmap_irec));
+ free_slab(&ag_rmap->ar_rmaps);
+ error = init_slab(&ag_rmap->ar_rmaps, sizeof(struct xfs_rmap_irec));
if (error)
goto err;
* rmap, we only need to add rmap records for AGFL blocks past
* that point in the AGFL because those blocks are a result of a
* no-rmap no-shrink freelist fixup that we did earlier.
+ *
+ * However, some blocks end up on the AGFL because the free space
+ * btrees shed blocks as a result of allocating space to fix the
+ * freelist. We already created in-core rmap records for the free
+ * space btree blocks, so we must be careful not to create those
+ * records again. Create a bitmap of already-recorded OWN_AG rmaps.
*/
- agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
- b = agfl_bno + ag_rmaps[agno].ar_flcount;
- while (*b != NULLAGBLOCK && b - agfl_bno < XFS_AGFL_SIZE(mp)) {
- error = rmap_add_ag_rec(mp, agno, be32_to_cpu(*b), 1,
- XFS_RMAP_OWN_AG);
- if (error)
- goto err;
+ error = init_slab_cursor(ag_rmap->ar_raw_rmaps, rmap_compare, &rm_cur);
+ if (error)
+ goto err;
+ error = -bitmap_alloc(&own_ag_bitmap);
+ if (error)
+ goto err_slab;
+ while ((rm_rec = pop_slab_cursor(rm_cur)) != NULL) {
+ if (rm_rec->rm_owner != XFS_RMAP_OWN_AG)
+ continue;
+ error = -bitmap_set(own_ag_bitmap, rm_rec->rm_startblock,
+ rm_rec->rm_blockcount);
+ if (error) {
+ /*
+ * If this range is already set, then the incore rmap
+ * records for the AG free space btrees overlap and
+ * we're toast because that is not allowed.
+ */
+ if (error == EEXIST)
+ error = EFSCORRUPTED;
+ goto err_slab;
+ }
+ }
+ free_slab_cursor(&rm_cur);
+
+ /* Create rmaps for any AGFL blocks that aren't already rmapped. */
+ agfl_bno = xfs_buf_to_agfl_bno(agflbp);
+ b = agfl_bno + ag_rmap->ar_flcount;
+ while (*b != cpu_to_be32(NULLAGBLOCK) &&
+ b - agfl_bno < libxfs_agfl_size(mp)) {
+ xfs_agblock_t agbno;
+
+ agbno = be32_to_cpu(*b);
+ if (!bitmap_test(own_ag_bitmap, agbno, 1)) {
+ error = rmap_add_ag_rec(mp, agno, agbno, 1,
+ XFS_RMAP_OWN_AG);
+ if (error)
+ goto err;
+ }
b++;
}
- libxfs_putbuf(agflbp);
+ libxfs_buf_relse(agflbp);
agflbp = NULL;
+ bitmap_free(&own_ag_bitmap);
/* Merge all the raw rmaps into the main list */
error = rmap_fold_raw_recs(mp, agno);
goto err;
/* Create cursors to refcount structures */
- error = init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare,
- &rm_cur);
+ error = init_slab_cursor(ag_rmap->ar_rmaps, rmap_compare, &rm_cur);
if (error)
goto err;
/* Insert rmaps into the btree one at a time */
rm_rec = pop_slab_cursor(rm_cur);
while (rm_rec) {
- error = -libxfs_trans_alloc(mp, &tres, 16, 0, 0, &tp);
+ struct xfs_owner_info oinfo = {};
+ struct xfs_perag *pag;
+
+ error = -libxfs_trans_alloc_rollable(mp, 16, &tp);
if (error)
goto err_slab;
- error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
- if (error)
+ pag = libxfs_perag_get(mp, agno);
+ error = -libxfs_alloc_read_agf(pag, tp, 0, &agbp);
+ if (error) {
+ libxfs_perag_put(pag);
goto err_trans;
+ }
ASSERT(XFS_RMAP_NON_INODE_OWNER(rm_rec->rm_owner));
- libxfs_rmap_ag_owner(&oinfo, rm_rec->rm_owner);
- error = -libxfs_rmap_alloc(tp, agbp, agno, rm_rec->rm_startblock,
+ oinfo.oi_owner = rm_rec->rm_owner;
+ error = -libxfs_rmap_alloc(tp, agbp, pag, rm_rec->rm_startblock,
rm_rec->rm_blockcount, &oinfo);
+ libxfs_perag_put(pag);
if (error)
goto err_trans;
free_slab_cursor(&rm_cur);
err:
if (agflbp)
- libxfs_putbuf(agflbp);
+ libxfs_buf_relse(agflbp);
+ if (own_ag_bitmap)
+ bitmap_free(&own_ag_bitmap);
return error;
}
rlrec.rc_startblock = agbno;
rlrec.rc_blockcount = len;
rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
+ rlrec.rc_domain = XFS_REFC_DOMAIN_SHARED;
+
error = slab_add(rlslab, &rlrec);
if (error)
do_error(
size_t old_stack_nr;
int error;
- if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ if (!xfs_has_reflink(mp))
return 0;
rmaps = ag_rmaps[agno].ar_rmaps;
mark_inode_rl(mp, stack_top);
/* Set nbno to the bno of the next refcount change */
- if (n < slab_count(rmaps))
+ if (n < slab_count(rmaps) && array_cur)
nbno = array_cur->rm_startblock;
else
nbno = NULLAGBLOCK;
struct xfs_rmap_irec *tmp,
int *have)
{
- int error;
-
/* Use the regular btree retrieval routine. */
- error = -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
- rm_rec->rm_blockcount,
+ return -libxfs_rmap_lookup_le(bt_cur, rm_rec->rm_startblock,
rm_rec->rm_owner, rm_rec->rm_offset,
- rm_rec->rm_flags, have);
- if (error)
- return error;
- if (*have == 0)
- return error;
- return -libxfs_rmap_get_rec(bt_cur, tmp, have);
+ rm_rec->rm_flags, tmp, have);
+}
+
+/* Look for an rmap in the rmapbt that matches a given rmap. */
+static int
+rmap_lookup_overlapped(
+ struct xfs_btree_cur *bt_cur,
+ struct xfs_rmap_irec *rm_rec,
+ struct xfs_rmap_irec *tmp,
+ int *have)
+{
+ /* Have to use our fancy version for overlapped */
+ return -libxfs_rmap_lookup_le_range(bt_cur, rm_rec->rm_startblock,
+ rm_rec->rm_owner, rm_rec->rm_offset,
+ rm_rec->rm_flags, tmp, have);
}
/* Does the btree rmap cover the observed rmap? */
/*
* Compare the observed reverse mappings against what's in the ag btree.
*/
-int
+void
rmaps_verify_btree(
struct xfs_mount *mp,
xfs_agnumber_t agno)
{
+ struct xfs_rmap_irec tmp;
struct xfs_slab_cursor *rm_cur;
struct xfs_btree_cur *bt_cur = NULL;
- int error;
- int have;
struct xfs_buf *agbp = NULL;
struct xfs_rmap_irec *rm_rec;
- struct xfs_rmap_irec tmp;
- struct xfs_perag *pag; /* per allocation group data */
+ struct xfs_perag *pag = NULL;
+ int have;
+ int error;
- if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
- return 0;
+ if (!xfs_has_rmapbt(mp))
+ return;
if (rmapbt_suspect) {
if (no_modify && agno == 0)
do_warn(_("would rebuild corrupt rmap btrees.\n"));
- return 0;
+ return;
}
/* Create cursors to refcount structures */
error = rmap_init_cursor(agno, &rm_cur);
- if (error)
- return error;
+ if (error) {
+ do_warn(_("Not enough memory to check reverse mappings.\n"));
+ return;
+ }
- error = -libxfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
- if (error)
- goto err;
+ pag = libxfs_perag_get(mp, agno);
+ error = -libxfs_alloc_read_agf(pag, NULL, 0, &agbp);
+ if (error) {
+ do_warn(_("Could not read AGF %u to check rmap btree.\n"),
+ agno);
+ goto err_pag;
+ }
/* Leave the per-ag data "uninitialized" since we rewrite it later */
- pag = libxfs_perag_get(mp, agno);
- pag->pagf_init = 0;
- libxfs_perag_put(pag);
+ clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
- bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
+ bt_cur = libxfs_rmapbt_init_cursor(mp, NULL, agbp, pag);
if (!bt_cur) {
- error = -ENOMEM;
- goto err;
+ do_warn(_("Not enough memory to check reverse mappings.\n"));
+ goto err_agf;
}
rm_rec = pop_slab_cursor(rm_cur);
while (rm_rec) {
error = rmap_lookup(bt_cur, rm_rec, &tmp, &have);
- if (error)
- goto err;
+ if (error) {
+ do_warn(
+_("Could not read reverse-mapping record for (%u/%u).\n"),
+ agno, rm_rec->rm_startblock);
+ goto err_cur;
+ }
+
+ /*
+ * Using the range query is expensive, so only do it if
+ * the regular lookup doesn't find anything or if it doesn't
+ * match the observed rmap.
+ */
+ if (xfs_has_reflink(bt_cur->bc_mp) &&
+ (!have || !rmap_is_good(rm_rec, &tmp))) {
+ error = rmap_lookup_overlapped(bt_cur, rm_rec,
+ &tmp, &have);
+ if (error) {
+ do_warn(
+_("Could not read reverse-mapping record for (%u/%u).\n"),
+ agno, rm_rec->rm_startblock);
+ goto err_cur;
+ }
+ }
if (!have) {
do_warn(
_("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRId64" \
rm_rec = pop_slab_cursor(rm_cur);
}
-err:
- if (bt_cur)
- libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
- if (agbp)
- libxfs_putbuf(agbp);
+err_cur:
+ libxfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+err_agf:
+ libxfs_buf_relse(agbp);
+err_pag:
+ libxfs_perag_put(pag);
free_slab_cursor(&rm_cur);
- return 0;
}
/*
* Compare the key fields of two rmap records -- positive if key1 > key2,
* negative if key1 < key2, and zero if equal.
*/
-__int64_t
+int64_t
rmap_diffkeys(
struct xfs_rmap_irec *kp1,
struct xfs_rmap_irec *kp2)
{
__u64 oa;
__u64 ob;
- __int64_t d;
+ int64_t d;
struct xfs_rmap_irec tmp;
tmp = *kp1;
tmp.rm_flags &= ~XFS_RMAP_REC_FLAGS;
ob = libxfs_rmap_irec_offset_pack(&tmp);
- d = (__int64_t)kp1->rm_startblock - kp2->rm_startblock;
+ d = (int64_t)kp1->rm_startblock - kp2->rm_startblock;
if (d)
return d;
(unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
}
+/*
+ * Inform the user that we're clearing the reflink flag on an inode that
+ * doesn't actually share any blocks. This is an optimization (the kernel
+ * skips refcount checks for non-reflink files) and not a corruption repair,
+ * so we don't need to log every time we clear a flag unless verbose mode is
+ * enabled.
+ */
+static void
+warn_clearing_reflink(
+ xfs_ino_t ino)
+{
+ static bool warned = false;
+ static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+ if (verbose) {
+ do_warn(_("clearing reflink flag on inode %"PRIu64"\n"), ino);
+ return;
+ }
+
+ if (warned)
+ return;
+
+ pthread_mutex_lock(&lock);
+ if (!warned) {
+ do_warn(_("clearing reflink flag on inodes when possible\n"));
+ warned = true;
+ }
+ pthread_mutex_unlock(&lock);
+}
+
/*
* Fix an inode's reflink flag.
*/
_("setting reflink flag on inode %"PRIu64"\n"),
XFS_AGINO_TO_INO(mp, agno, agino));
else if (!no_modify) /* && !set */
- do_warn(
-_("clearing reflink flag on inode %"PRIu64"\n"),
- XFS_AGINO_TO_INO(mp, agno, agino));
+ warn_clearing_reflink(XFS_AGINO_TO_INO(mp, agno, agino));
if (no_modify)
return 0;
else
dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
libxfs_dinode_calc_crc(mp, dino);
- libxfs_writebuf(buf, 0);
+ libxfs_buf_mark_dirty(buf);
+ libxfs_buf_relse(buf);
return 0;
}
{
struct ino_tree_node *irec;
int bit;
- __uint64_t was;
- __uint64_t is;
- __uint64_t diff;
- __uint64_t mask;
+ uint64_t was;
+ uint64_t is;
+ uint64_t diff;
+ uint64_t mask;
int error = 0;
xfs_agino_t agino;
return error;
}
+/*
+ * Return the number of refcount objects for an AG.
+ */
+size_t
+refcount_record_count(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ return slab_count(ag_rmaps[agno].ar_refcount_items);
+}
+
+/*
+ * Return a slab cursor that will return refcount objects in order.
+ */
+int
+init_refcount_cursor(
+ xfs_agnumber_t agno,
+ struct xfs_slab_cursor **cur)
+{
+ return init_slab_cursor(ag_rmaps[agno].ar_refcount_items, NULL, cur);
+}
+
+/*
+ * Disable the refcount btree check.
+ */
+void
+refcount_avoid_check(void)
+{
+ refcbt_suspect = true;
+}
+
+/*
+ * Compare the observed reference counts against what's in the ag btree.
+ */
+void
+check_refcounts(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_refcount_irec tmp;
+ struct xfs_slab_cursor *rl_cur;
+ struct xfs_btree_cur *bt_cur = NULL;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_perag *pag = NULL;
+ struct xfs_refcount_irec *rl_rec;
+ int have;
+ int i;
+ int error;
+
+ if (!xfs_has_reflink(mp))
+ return;
+ if (refcbt_suspect) {
+ if (no_modify && agno == 0)
+ do_warn(_("would rebuild corrupt refcount btrees.\n"));
+ return;
+ }
+
+ /* Create cursors to refcount structures */
+ error = init_refcount_cursor(agno, &rl_cur);
+ if (error) {
+ do_warn(_("Not enough memory to check refcount data.\n"));
+ return;
+ }
+
+ pag = libxfs_perag_get(mp, agno);
+ error = -libxfs_alloc_read_agf(pag, NULL, 0, &agbp);
+ if (error) {
+ do_warn(_("Could not read AGF %u to check refcount btree.\n"),
+ agno);
+ goto err_pag;
+ }
+
+ /* Leave the per-ag data "uninitialized" since we rewrite it later */
+ clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
+
+ bt_cur = libxfs_refcountbt_init_cursor(mp, NULL, agbp, pag);
+ if (!bt_cur) {
+ do_warn(_("Not enough memory to check refcount data.\n"));
+ goto err_agf;
+ }
+
+ rl_rec = pop_slab_cursor(rl_cur);
+ while (rl_rec) {
+ /* Look for a refcount record in the btree */
+ error = -libxfs_refcount_lookup_le(bt_cur,
+ XFS_REFC_DOMAIN_SHARED, rl_rec->rc_startblock,
+ &have);
+ if (error) {
+ do_warn(
+_("Could not read reference count record for (%u/%u).\n"),
+ agno, rl_rec->rc_startblock);
+ goto err_cur;
+ }
+ if (!have) {
+ do_warn(
+_("Missing reference count record for (%u/%u) len %u count %u\n"),
+ agno, rl_rec->rc_startblock,
+ rl_rec->rc_blockcount, rl_rec->rc_refcount);
+ goto next_loop;
+ }
+
+ error = -libxfs_refcount_get_rec(bt_cur, &tmp, &i);
+ if (error) {
+ do_warn(
+_("Could not read reference count record for (%u/%u).\n"),
+ agno, rl_rec->rc_startblock);
+ goto err_cur;
+ }
+ if (!i) {
+ do_warn(
+_("Missing reference count record for (%u/%u) len %u count %u\n"),
+ agno, rl_rec->rc_startblock,
+ rl_rec->rc_blockcount, rl_rec->rc_refcount);
+ goto next_loop;
+ }
+
+ /* Compare each refcount observation against the btree's */
+ if (tmp.rc_domain != rl_rec->rc_domain ||
+ tmp.rc_startblock != rl_rec->rc_startblock ||
+ tmp.rc_blockcount != rl_rec->rc_blockcount ||
+ tmp.rc_refcount != rl_rec->rc_refcount) {
+ unsigned int start;
+
+ start = xfs_refcount_encode_startblock(
+ tmp.rc_startblock, tmp.rc_domain);
+
+ do_warn(
+_("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
+ agno, start, tmp.rc_blockcount,
+ tmp.rc_refcount, agno, rl_rec->rc_startblock,
+ rl_rec->rc_blockcount, rl_rec->rc_refcount);
+ }
+next_loop:
+ rl_rec = pop_slab_cursor(rl_cur);
+ }
+
+err_cur:
+ libxfs_btree_del_cursor(bt_cur, error);
+err_agf:
+ libxfs_buf_relse(agbp);
+err_pag:
+ libxfs_perag_put(pag);
+ free_slab_cursor(&rl_cur);
+}
+
/*
* Regenerate the AGFL so that we don't run out of it while rebuilding the
* rmap btree. If skip_rmapbt is true, don't update the rmapbt (most probably
{
xfs_alloc_arg_t args;
xfs_trans_t *tp;
- struct xfs_trans_res tres = {0};
int flags;
int error;
args.agno = agno;
args.alignment = 1;
args.pag = libxfs_perag_get(mp, agno);
- error = -libxfs_trans_alloc(mp, &tres,
- libxfs_alloc_min_freelist(mp, args.pag), 0, 0, &tp);
+ error = -libxfs_trans_alloc_rollable(mp, 0, &tp);
if (error)
do_error(_("failed to fix AGFL on AG %d, error %d\n"),
agno, error);
do_error(_("failed to fix AGFL on AG %d, error %d\n"),
agno, error);
}
- libxfs_trans_commit(tp);
+ error = -libxfs_trans_commit(tp);
+ if (error)
+ do_error(_("%s: commit failed, error %d\n"), __func__, error);
}
/*