summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 11:19:47 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-12-15 17:29:25 -0800
commite54775010c76c63a8986ec9e40272782a20256aa (patch)
treeeeb31eb1aab4a5de3c1e3451ae1fcaa779e110f4 /fs/xfs/scrub
parentfff27f913157bbf72ca0e09e508afab2c2272d3e (diff)
xfs: online repair of the realtime refcount btree
Port the data device's refcount btree repair code to the realtime refcount btree. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/xfs/scrub')
-rw-r--r--fs/xfs/scrub/bmap_repair.c2
-rw-r--r--fs/xfs/scrub/repair.c15
-rw-r--r--fs/xfs/scrub/repair.h7
-rw-r--r--fs/xfs/scrub/rtrefcount_repair.c741
-rw-r--r--fs/xfs/scrub/rtrmap_repair.c2
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/trace.h25
7 files changed, 787 insertions, 7 deletions
diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c
index d3ed1c884353..2256b77e20fc 100644
--- a/fs/xfs/scrub/bmap_repair.c
+++ b/fs/xfs/scrub/bmap_repair.c
@@ -285,7 +285,7 @@ xrep_bmap_check_rtfork_rmap(
/* Make sure this isn't free space. */
return xrep_require_rtext_inuse(sc, rec->rm_startblock,
- rec->rm_blockcount);
+ rec->rm_blockcount, false);
}
/* Record realtime extents that belong to this inode's fork. */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 641b3c8c4e40..c44283b0a502 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -2246,12 +2246,16 @@ xrep_dotdot_lookup(
}
#ifdef CONFIG_XFS_RT
-/* Ensure that all rt blocks in the given range are not marked free. */
+/*
+ * Ensure that all rt blocks in the given range are not marked free or
+ * misaligned.
+ */
int
xrep_require_rtext_inuse(
struct xfs_scrub *sc,
xfs_rtblock_t rtbno,
- xfs_filblks_t len)
+ xfs_filblks_t len,
+ bool must_align)
{
struct xfs_mount *mp = sc->mp;
xfs_rtblock_t startext;
@@ -2262,7 +2266,12 @@ xrep_require_rtext_inuse(
/* Round the starting rt extent down and the end rt extent up. */
startext = div_u64_rem(rtbno, mp->m_sb.sb_rextsize, &mod);
+ if (must_align && mod != 0)
+ return -EFSCORRUPTED;
+
endext = div_u64_rem(rtbno + len - 1, mp->m_sb.sb_rextsize, &mod);
+ if (must_align && mod != mp->m_sb.sb_rextsize - 1)
+ return -EFSCORRUPTED;
error = xfs_rtalloc_extent_is_free(mp, sc->tp, startext,
endext - startext + 1, &is_free);
@@ -2293,6 +2302,8 @@ xrep_is_rtmeta_ino(
/* Newer rt metadata files are not guaranteed to exist */
if (sc->mp->m_rrmapip && ino == sc->mp->m_rrmapip->i_ino)
return true;
+ if (sc->mp->m_rrefcountip && ino == sc->mp->m_rrefcountip->i_ino)
+ return true;
return false;
}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 829d1df0bbdb..74216b2ee7c7 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -97,9 +97,9 @@ int xrep_check_ino_btree_mapping(struct xfs_scrub *sc,
#ifdef CONFIG_XFS_RT
int xrep_require_rtext_inuse(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
- xfs_filblks_t len);
+ xfs_filblks_t len, bool must_align);
#else
-# define xrep_require_rtext_inuse(sc, rtbno, len) (-ENOSYS)
+# define xrep_require_rtext_inuse(sc, rtbno, len, align) (-ENOSYS)
#endif
bool xrep_is_rtmeta_ino(struct xfs_scrub *sc, xfs_ino_t ino);
@@ -159,10 +159,12 @@ int xrep_quotacheck(struct xfs_scrub *sc);
int xrep_rtsummary(struct xfs_scrub *sc);
int xrep_rtbitmap(struct xfs_scrub *sc);
int xrep_rtrmapbt(struct xfs_scrub *sc);
+int xrep_rtrefcountbt(struct xfs_scrub *sc);
#else
# define xrep_rtsummary xrep_notsupported
# define xrep_rtbitmap xrep_notsupported
# define xrep_rtrmapbt xrep_notsupported
+# define xrep_rtrefcountbt xrep_notsupported
#endif /* CONFIG_XFS_RT */
struct xrep_newbt_resv {
@@ -308,6 +310,7 @@ xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *whatever)
#define xrep_parent xrep_notsupported
#define xrep_rtbitmap xrep_notsupported
#define xrep_rtrmapbt xrep_notsupported
+#define xrep_rtrefcountbt xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rtrefcount_repair.c b/fs/xfs/scrub/rtrefcount_repair.c
new file mode 100644
index 000000000000..6b0cb3dcaa4b
--- /dev/null
+++ b/fs/xfs/scrub/rtrefcount_repair.c
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_rtrefcount_btree.h"
+#include "xfs_error.h"
+#include "xfs_health.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_rtalloc.h"
+#include "xfs_ag.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
+#include "scrub/xfarray.h"
+
+/*
+ * Rebuilding the Reference Count Btree
+ * ====================================
+ *
+ * This algorithm is "borrowed" from xfs_repair. Imagine the rmap
+ * entries as rectangles representing extents of physical blocks, and
+ * that the rectangles can be laid down to allow them to overlap each
+ * other; then we know that we must emit a refcnt btree entry wherever
+ * the amount of overlap changes, i.e. the emission stimulus is
+ * level-triggered:
+ *
+ * - ---
+ * -- ----- ---- --- ------
+ * -- ---- ----------- ---- ---------
+ * -------------------------------- -----------
+ * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
+ * 2 1 23 21 3 43 234 2123 1 01 2 3 0
+ *
+ * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
+ *
+ * Note that in the actual refcnt btree we don't store the refcount < 2
+ * cases because the bnobt tells us which blocks are free; single-use
+ * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
+ * supports storing multiple entries covering a given block we could
+ * theoretically dispense with the refcntbt and simply count rmaps, but
+ * that's inefficient in the (hot) write path, so we'll take the cost of
+ * the extra tree to save time. Also there's no guarantee that rmap
+ * will be enabled.
+ *
+ * Given an array of rmaps sorted by physical block number, a starting
+ * physical block (sp), a bag to hold rmaps that cover sp, and the next
+ * physical block where the level changes (np), we can reconstruct the
+ * rt refcount btree as follows:
+ *
+ * While there are still unprocessed rmaps in the array,
+ * - Set sp to the physical block (pblk) of the next unprocessed rmap.
+ * - Add to the bag all rmaps in the array where startblock == sp.
+ * - Set np to the physical block where the bag size will change. This
+ * is the minimum of (the pblk of the next unprocessed rmap) and
+ * (startblock + len of each rmap in the bag).
+ * - Record the bag size as old_bag_size.
+ *
+ * - While the bag isn't empty,
+ * - Remove from the bag all rmaps where startblock + len == np.
+ * - Add to the bag all rmaps in the array where startblock == np.
+ * - If the bag size isn't old_bag_size, store the refcount entry
+ * (sp, np - sp, bag_size) in the refcnt btree.
+ * - If the bag is empty, break out of the inner loop.
+ * - Set old_bag_size to the bag size
+ * - Set sp = np.
+ * - Set np to the physical block where the bag size will change.
+ * This is the minimum of (the pblk of the next unprocessed rmap)
+ * and (startblock + len of each rmap in the bag).
+ *
+ * Like all the other repairers, we make a list of all the refcount
+ * records we need, then reinitialize the rt refcount btree root and
+ * insert all the records.
+ */
+
+/* The only parts of the rmap that we care about for computing refcounts. */
+struct xrep_rtrefc_rmap {
+ xfs_rtblock_t startblock;
+ xfs_filblks_t blockcount;
+} __packed;
+
+struct xrep_rtrefc {
+ /* refcount extents */
+ struct xfarray *refcount_records;
+
+ /* new refcountbt information */
+ struct xrep_newbt new_btree_info;
+ struct xfs_btree_bload rtrefc_bload;
+
+ /* old refcountbt blocks */
+ struct xbitmap old_rtrefcountbt_blocks;
+
+ struct xfs_scrub *sc;
+
+ /* # of refcountbt blocks */
+ xfs_filblks_t btblocks;
+
+ /* get_record()'s position in the free space record array. */
+ uint64_t iter;
+};
+
+/* Check for any obvious conflicts with this shared/CoW staging extent. */
+STATIC int
+xrep_rtrefc_check_ext(
+ struct xfs_scrub *sc,
+ const struct xfs_refcount_irec *rec)
+{
+ xfs_rtblock_t rtbno = rec->rc_startblock;
+
+ if (rtbno >= XFS_RTREFC_COW_START)
+ rtbno -= XFS_RTREFC_COW_START;
+
+ /* Must be within the rt device. */
+ if (!xfs_verify_rtext(sc->mp, rtbno, rec->rc_blockcount))
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space or misaligned. */
+ return xrep_require_rtext_inuse(sc, rtbno, rec->rc_blockcount, true);
+}
+
+/* Record a reference count extent. */
+STATIC int
+xrep_rtrefc_stash(
+ struct xrep_rtrefc *rr,
+ xfs_rtblock_t bno,
+ xfs_filblks_t len,
+ xfs_nlink_t refcount)
+{
+ struct xfs_refcount_irec irec = {
+ .rc_startblock = bno,
+ .rc_blockcount = len,
+ .rc_refcount = refcount,
+ };
+ struct xfs_mount *mp = rr->sc->mp;
+ int error = 0;
+
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ error = xrep_rtrefc_check_ext(rr->sc, &irec);
+ if (error)
+ return error;
+
+ trace_xrep_rtrefc_found(mp, &irec);
+
+ return xfarray_append(rr->refcount_records, &irec);
+}
+
+/* Record a CoW staging extent. */
+STATIC int
+xrep_rtrefc_stash_cow(
+ struct xrep_rtrefc *rr,
+ xfs_rtblock_t bno,
+ xfs_filblks_t len)
+{
+ return xrep_rtrefc_stash(rr, bno + XFS_RTREFC_COW_START, len, 1);
+}
+
+/* Decide if an rmap could describe a shared extent. */
+static inline bool
+xrep_rtrefc_rmap_shareable(
+ const struct xfs_rmap_irec *rmap)
+{
+ /* rt metadata are never sharable */
+ if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
+ return false;
+
+ /* Unwritten file blocks are not shareable. */
+ if (rmap->rm_flags & XFS_RMAP_UNWRITTEN)
+ return false;
+
+ return true;
+}
+
+/* Grab the next (abbreviated) rmap record from the rmapbt. */
+STATIC int
+xrep_rtrefc_walk_rmaps(
+ struct xrep_rtrefc *rr,
+ struct xrep_rtrefc_rmap *rrm,
+ bool *have_rec)
+{
+ struct xfs_rmap_irec rmap;
+ struct xfs_btree_cur *cur = rr->sc->sr.rmap_cur;
+ struct xfs_mount *mp = cur->bc_mp;
+ int have_gt;
+ int error = 0;
+
+ *have_rec = false;
+
+ /*
+ * Loop through the remaining rmaps. Remember CoW staging
+ * extents and the refcountbt blocks from the old tree for later
+ * disposal. We can only share written data fork extents, so
+ * keep looping until we find an rmap for one.
+ */
+ do {
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ error = xfs_btree_increment(cur, 0, &have_gt);
+ if (error)
+ return error;
+ if (!have_gt)
+ return 0;
+
+ error = xfs_rmap_get_rec(cur, &rmap, &have_gt);
+ if (error)
+ return error;
+ if (XFS_IS_CORRUPT(mp, !have_gt)) {
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+ }
+
+ if (rmap.rm_owner == XFS_RMAP_OWN_COW) {
+ error = xrep_rtrefc_stash_cow(rr, rmap.rm_startblock,
+ rmap.rm_blockcount);
+ if (error)
+ return error;
+ } else if (xfs_internal_inum(mp, rmap.rm_owner) ||
+ (rmap.rm_flags & (XFS_RMAP_ATTR_FORK |
+ XFS_RMAP_BMBT_BLOCK))) {
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+ }
+
+ } while (!xrep_rtrefc_rmap_shareable(&rmap));
+
+ rrm->startblock = rmap.rm_startblock;
+ rrm->blockcount = rmap.rm_blockcount;
+ *have_rec = true;
+ return 0;
+}
+
+/* Compare two btree extents. */
+static int
+xrep_rtrefc_extent_cmp(
+ const void *a,
+ const void *b)
+{
+ const struct xfs_refcount_irec *ap = a;
+ const struct xfs_refcount_irec *bp = b;
+
+ if (ap->rc_startblock > bp->rc_startblock)
+ return 1;
+ else if (ap->rc_startblock < bp->rc_startblock)
+ return -1;
+ return 0;
+}
+
+#define RRM_NEXT(r) ((r).startblock + (r).blockcount)
+/*
+ * Find the next block where the refcount changes, given the next rmap we
+ * looked at and the ones we're already tracking.
+ */
+static inline int
+xrep_rtrefc_next_edge(
+ struct xfarray *rmap_bag,
+ struct xrep_rtrefc_rmap *next_rrm,
+ bool next_valid,
+ xfs_rtblock_t *nbnop)
+{
+ struct xrep_rtrefc_rmap rrm;
+ uint64_t i = 0;
+ xfs_rtblock_t nbno = NULLFSBLOCK;
+ int error;
+
+ if (next_valid)
+ nbno = next_rrm->startblock;
+
+ while ((error = xfarray_iter(rmap_bag, &i, &rrm)) == 1)
+ nbno = min_t(xfs_rtblock_t, nbno, RRM_NEXT(rrm));
+
+ if (error)
+ return error;
+
+ /*
+ * We should have found /something/ because either next_rrm is the next
+ * interesting rmap to look at after emitting this refcount extent, or
+ * there are other rmaps in rmap_bag contributing to the current
+ * sharing count. But if something is seriously wrong, bail out.
+ */
+ if (nbno == NULLFSBLOCK)
+ return -EFSCORRUPTED;
+
+ *nbnop = nbno;
+ return 0;
+}
+
+/* Record extents that belong to the realtime refcount inode. */
+STATIC int
+xrep_rtrefc_walk_rmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rtrefc *rr = priv;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_fsblock_t fsbno;
+ int error = 0;
+
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != mp->m_rrefcountip->i_ino)
+ return 0;
+
+ error = xrep_check_ino_btree_mapping(rr->sc, rec);
+ if (error)
+ return error;
+
+ fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
+ rec->rm_startblock);
+
+ return xbitmap_set(&rr->old_rtrefcountbt_blocks, fsbno,
+ rec->rm_blockcount);
+}
+
+/*
+ * Walk forward through the rmap btree to collect all rmaps starting at
+ * @bno in @rmap_bag. These represent the file(s) that share ownership of
+ * the current block. Upon return, the rmap cursor points to the last record
+ * satisfying the startblock constraint.
+ */
+static int
+xrep_rtrefc_push_rmaps_at(
+ struct xrep_rtrefc *rr,
+ struct xfarray *rmap_bag,
+ xfs_fsblock_t bno,
+ struct xrep_rtrefc_rmap *rrm,
+ bool *have,
+ size_t *stack_sz)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int have_gt;
+ int error;
+
+ while (*have && rrm->startblock == bno) {
+ error = xfarray_store_anywhere(rmap_bag, rrm);
+ if (error)
+ return error;
+ (*stack_sz)++;
+ error = xrep_rtrefc_walk_rmaps(rr, rrm, have);
+ if (error)
+ return error;
+ }
+
+ error = xfs_btree_decrement(sc->sr.rmap_cur, 0, &have_gt);
+ if (error)
+ return error;
+ if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
+ xfs_btree_mark_sick(sc->sr.rmap_cur);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
+/* Scan one AG for reverse mappings for the realtime refcount btree. */
+STATIC int
+xrep_rtrefc_scan_ag(
+ struct xrep_rtrefc *rr,
+ struct xfs_perag *pag)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ error = xrep_ag_init(sc, pag, &sc->sa);
+ if (error)
+ return error;
+
+ error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrefc_walk_rmap, rr);
+ xchk_ag_free(sc, &sc->sa);
+ return error;
+}
+
+/* Iterate all the rmap records to generate reference count data. */
+STATIC int
+xrep_rtrefc_find_refcounts(
+ struct xrep_rtrefc *rr)
+{
+ struct xrep_rtrefc_rmap rrm;
+ struct xfs_scrub *sc = rr->sc;
+ struct xfarray *rmap_bag;
+ struct xfs_perag *pag;
+ xfs_rtblock_t sbno;
+ xfs_rtblock_t cbno;
+ xfs_rtblock_t nbno;
+ size_t old_stack_sz;
+ size_t stack_sz = 0;
+ xfs_agnumber_t agno;
+ bool have;
+ int error;
+
+ /* Scan for old rtrefc btree blocks. */
+ for_each_perag(sc->mp, agno, pag) {
+ error = xrep_rtrefc_scan_ag(rr, pag);
+ if (error) {
+ xfs_perag_put(pag);
+ return error;
+ }
+ }
+
+ xrep_rt_btcur_init(sc, &sc->sr);
+
+ /* Set up some storage */
+ error = xfarray_create(sc->mp, "rtrmap bag",
+ sizeof(struct xrep_rtrefc_rmap), &rmap_bag);
+ if (error)
+ goto out_cur;
+
+ /* Start the rtrmapbt cursor to the left of all records. */
+ error = xfs_btree_goto_left_edge(sc->sr.rmap_cur);
+ if (error)
+ goto out_bag;
+
+ /* Process reverse mappings into refcount data. */
+ while (xfs_btree_has_more_records(sc->sr.rmap_cur)) {
+ /* Push all rmaps with pblk == sbno onto the stack */
+ error = xrep_rtrefc_walk_rmaps(rr, &rrm, &have);
+ if (error)
+ goto out_bag;
+ if (!have)
+ break;
+ sbno = cbno = rrm.startblock;
+ error = xrep_rtrefc_push_rmaps_at(rr, rmap_bag, sbno,
+ &rrm, &have, &stack_sz);
+ if (error)
+ goto out_bag;
+
+ /* Set nbno to the bno of the next refcount change */
+ error = xrep_rtrefc_next_edge(rmap_bag, &rrm, have, &nbno);
+ if (error)
+ goto out_bag;
+
+ ASSERT(nbno > sbno);
+ old_stack_sz = stack_sz;
+
+ /* While stack isn't empty... */
+ while (stack_sz) {
+ uint64_t i = 0;
+
+ /* Pop all rmaps that end at nbno */
+ while ((error = xfarray_iter(rmap_bag, &i,
+ &rrm)) == 1) {
+ if (RRM_NEXT(rrm) != nbno)
+ continue;
+ error = xfarray_nullify(rmap_bag, i - 1);
+ if (error)
+ goto out_bag;
+ stack_sz--;
+ }
+ if (error)
+ goto out_bag;
+
+ /* Push array items that start at nbno */
+ error = xrep_rtrefc_walk_rmaps(rr, &rrm, &have);
+ if (error)
+ goto out_bag;
+ if (have) {
+ error = xrep_rtrefc_push_rmaps_at(rr, rmap_bag,
+ nbno, &rrm, &have, &stack_sz);
+ if (error)
+ goto out_bag;
+ }
+
+ /* Emit refcount if necessary */
+ ASSERT(nbno > cbno);
+ if (stack_sz != old_stack_sz) {
+ if (old_stack_sz > 1) {
+ error = xrep_rtrefc_stash(rr, cbno,
+ nbno - cbno,
+ old_stack_sz);
+ if (error)
+ goto out_bag;
+ }
+ cbno = nbno;
+ }
+
+ /* Stack empty, go find the next rmap */
+ if (stack_sz == 0)
+ break;
+ old_stack_sz = stack_sz;
+ sbno = nbno;
+
+ /* Set nbno to the bno of the next refcount change */
+ error = xrep_rtrefc_next_edge(rmap_bag, &rrm, have,
+ &nbno);
+ if (error)
+ goto out_bag;
+
+ ASSERT(nbno > sbno);
+ }
+ }
+
+ ASSERT(stack_sz == 0);
+out_bag:
+ xfarray_destroy(rmap_bag);
+out_cur:
+ xchk_rt_btcur_free(&sc->sr);
+ return error;
+}
+#undef RRM_NEXT
+
+/* Retrieve refcountbt data for bulk load. */
+STATIC int
+xrep_rtrefc_get_record(
+ struct xfs_btree_cur *cur,
+ void *priv)
+{
+ struct xrep_rtrefc *rr = priv;
+
+ return xfarray_load_next(rr->refcount_records, &rr->iter,
+ &cur->bc_rec.rc);
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_rtrefc_claim_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ void *priv)
+{
+ struct xrep_rtrefc *rr = priv;
+ int error;
+
+ error = xrep_newbt_relog_efis(&rr->new_btree_info);
+ if (error)
+ return error;
+
+ return xrep_newbt_claim_block(cur, &rr->new_btree_info, ptr);
+}
+
+/* Figure out how much space we need to create the incore btree root block. */
+STATIC size_t
+xrep_rtrefc_iroot_size(
+ struct xfs_btree_cur *cur,
+ unsigned int level,
+ unsigned int nr_this_level,
+ void *priv)
+{
+ return xfs_rtrefcount_broot_space_calc(cur->bc_mp, level,
+ nr_this_level);
+}
+
+/*
+ * Use the collected refcount information to stage a new rt refcount btree. If
+ * this is successful we'll return with the new btree root information logged
+ * to the repair transaction but not yet committed.
+ */
+STATIC int
+xrep_rtrefc_build_new_tree(
+ struct xrep_rtrefc *rr)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_btree_cur *refc_cur;
+ int error;
+
+ rr->rtrefc_bload.get_record = xrep_rtrefc_get_record;
+ rr->rtrefc_bload.claim_block = xrep_rtrefc_claim_block;
+ rr->rtrefc_bload.iroot_size = xrep_rtrefc_iroot_size;
+ xrep_bload_estimate_slack(sc, &rr->rtrefc_bload);
+
+ /*
+ * Sort the refcount extents by startblock or else the btree records
+ * will be in the wrong order.
+ */
+ error = xfarray_sort(rr->refcount_records, xrep_rtrefc_extent_cmp);
+ if (error)
+ return error;
+
+ /*
+ * Prepare to construct the new btree by reserving disk space for the
+ * new btree and setting up all the accounting information we'll need
+ * to root the new btree while it's under construction and before we
+ * attach it to the realtime refcount inode.
+ */
+ xfs_rmap_ino_bmbt_owner(&oinfo, mp->m_rrefcountip->i_ino,
+ XFS_DATA_FORK);
+ xrep_newbt_init_inode(&rr->new_btree_info, sc, XFS_DATA_FORK, &oinfo);
+ refc_cur = xfs_rtrefcountbt_stage_cursor(mp, mp->m_rrefcountip,
+ &rr->new_btree_info.ifake);
+
+ /* Compute how many blocks we'll need. */
+ error = xfs_btree_bload_compute_geometry(refc_cur, &rr->rtrefc_bload,
+ xfarray_length(rr->refcount_records));
+ if (error)
+ goto err_cur;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(sc, &error))
+ goto err_cur;
+
+ /*
+ * Guess how many blocks we're going to need to rebuild an entire
+ * rtrefcountbt from the number of extents we found, and pump up our
+ * transaction to have sufficient block reservation.
+ */
+ error = xfs_trans_reserve_more_inode(sc->tp, mp->m_rrefcountip,
+ rr->rtrefc_bload.nr_blocks, 0);
+ if (error)
+ goto err_cur;
+
+ /* Reserve the space we'll need for the new btree. */
+ error = xrep_newbt_alloc_blocks(&rr->new_btree_info,
+ rr->rtrefc_bload.nr_blocks);
+ if (error)
+ goto err_cur;
+
+ /* Add all observed refcount records. */
+ rr->new_btree_info.ifake.if_fork->if_format = XFS_DINODE_FMT_REFCOUNT;
+ rr->iter = 0;
+ error = xfs_btree_bload(refc_cur, &rr->rtrefc_bload, rr);
+ if (error)
+ goto err_cur;
+
+ /*
+ * Install the new rtrefc btree in the inode. After this point the old
+ * btree is no longer accessible, the new tree is live, and we can
+ * delete the cursor.
+ */
+ xfs_rtrefcountbt_commit_staged_btree(refc_cur, sc->tp);
+ xrep_inode_set_nblocks(rr->sc, rr->new_btree_info.ifake.if_blocks);
+ xfs_btree_del_cursor(refc_cur, 0);
+
+ /* Dispose of any unused blocks and the accounting information. */
+ xrep_newbt_destroy(&rr->new_btree_info, error);
+ return xrep_roll_trans(sc);
+err_cur:
+ xfs_btree_del_cursor(refc_cur, error);
+ xrep_newbt_destroy(&rr->new_btree_info, error);
+ return error;
+}
+
+/*
+ * Now that we've logged the roots of the new btrees, invalidate all of the
+ * old blocks and free them.
+ */
+STATIC int
+xrep_rtrefc_remove_old_tree(
+ struct xrep_rtrefc *rr)
+{
+ struct xfs_owner_info oinfo;
+ int error;
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, rr->sc->ip->i_ino, XFS_DATA_FORK);
+
+ /*
+ * Free all the extents that were allocated to the former rtrefcountbt
+ * and aren't cross-linked with something else. If the incore space
+ * reservation for the rtrmap inode is insufficient, this will refill
+ * it.
+ */
+ error = xrep_reap_extents(rr->sc, &rr->old_rtrefcountbt_blocks, &oinfo,
+ XFS_AG_RESV_IMETA);
+ if (error)
+ return error;
+
+ /*
+ * Ensure the proper reservation for the rtrefcount inode so that we
+ * don't fail to expand the btree.
+ */
+ return xrep_reset_imeta_reservation(rr->sc);
+}
+
+/* Rebuild the rt refcount btree. */
+int
+xrep_rtrefcountbt(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rtrefc *rr;
+ struct xfs_mount *mp = sc->mp;
+ int error;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_has_rtrmapbt(mp))
+ return -EOPNOTSUPP;
+
+ rr = kmem_zalloc(sizeof(struct xrep_rtrefc), KM_NOFS | KM_MAYFAIL);
+ if (!rr)
+ return -ENOMEM;
+ rr->sc = sc;
+
+ /* Set up some storage */
+ error = xfarray_create(sc->mp, "rtrefcount records",
+ sizeof(struct xfs_refcount_irec),
+ &rr->refcount_records);
+ if (error)
+ goto out_rr;
+
+ /* Collect all reference counts. */
+ xbitmap_init(&rr->old_rtrefcountbt_blocks);
+ error = xrep_rtrefc_find_refcounts(rr);
+ if (error)
+ goto out_bitmap;
+
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ error = xrep_ino_dqattach(sc);
+ if (error)
+ goto out_bitmap;
+
+ /* Rebuild the refcount information. */
+ error = xrep_rtrefc_build_new_tree(rr);
+ if (error)
+ goto out_bitmap;
+
+ /* Kill the old tree. */
+ error = xrep_rtrefc_remove_old_tree(rr);
+
+out_bitmap:
+ xbitmap_destroy(&rr->old_rtrefcountbt_blocks);
+ xfarray_destroy(rr->refcount_records);
+out_rr:
+ kmem_free(rr);
+ return error;
+}
diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c
index e0f83af104de..03a28e733877 100644
--- a/fs/xfs/scrub/rtrmap_repair.c
+++ b/fs/xfs/scrub/rtrmap_repair.c
@@ -153,7 +153,7 @@ xrep_rtrmap_check_mapping(
/* Make sure this isn't free space. */
return xrep_require_rtext_inuse(sc, rec->rm_startblock,
- rec->rm_blockcount);
+ rec->rm_blockcount, false);
}
/* Store a reverse-mapping record. */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 160480548461..0c5f8b0995b6 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -390,7 +390,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_rtrefcountbt,
.scrub = xchk_rtrefcountbt,
.has = xfs_has_rtreflink,
- .repair = xrep_notsupported,
+ .repair = xrep_rtrefcountbt,
},
};
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index bca2d61dc1ad..d5901bbf4480 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1969,6 +1969,31 @@ TRACE_EVENT(xrep_rtrmap_found,
__entry->flags)
);
+TRACE_EVENT(xrep_rtrefc_found,
+ TP_PROTO(struct xfs_mount *mp, const struct xfs_refcount_irec *rec),
+ TP_ARGS(mp, rec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_fsblock_t, startblock)
+ __field(xfs_filblks_t, blockcount)
+ __field(xfs_nlink_t, refcount)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->startblock = rec->rc_startblock;
+ __entry->blockcount = rec->rc_blockcount;
+ __entry->refcount = rec->rc_refcount;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rtbno 0x%llx fsbcount 0x%llx refcount %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->startblock,
+ __entry->blockcount,
+ __entry->refcount)
+)
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */