summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-29 18:43:26 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:41:16 -0700
commit89b052a624167d8f8d39c706669e3b0ae6107173 (patch)
tree81fcce35b30186cc68a7ca93e735d16108ea7b9f
parentc0f26249c5af3ed541cab86313f6ebaf62e7c9a3 (diff)
xfs: hook live rmap operations during a repair operation
Hook the regular rmap code when an rmapbt repair operation is running so that we can unlock the AGF buffer to scan the filesystem and keep the in-memory btree up to date during the scan. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/libxfs/xfs_ag.c1
-rw-r--r--fs/xfs/libxfs/xfs_ag.h3
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c82
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h16
-rw-r--r--fs/xfs/scrub/rmap_repair.c129
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h47
7 files changed, 258 insertions, 21 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index b6a9f6dde55a..9100cf7417b9 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -263,6 +263,7 @@ xfs_initialize_perag(
init_waitqueue_head(&pag->pagb_wait);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
+ xfs_hook_init(&pag->pag_rmap_update_hooks);
error = xfs_buf_hash_init(pag);
if (error)
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index cfc51a5af74a..cc52f628acb3 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -114,6 +114,9 @@ struct xfs_perag {
atomic_t pag_intents;
wait_queue_head_t pag_intents_wq;
#endif
+
+ /* online rmap repair stuff */
+ struct xfs_hook_chain pag_rmap_update_hooks;
};
int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 0f21934ddcbf..a6be6c7fe6e5 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -805,6 +805,33 @@ out_error:
return error;
}
+/* Call a hook to capture deferred rmapbt updates in real time. */
+#ifdef CONFIG_XFS_LIVE_HOOKS
+static inline void
+xfs_rmap_update_hook(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ enum xfs_rmap_intent_type op,
+ xfs_fsblock_t startblock,
+ xfs_filblks_t blockcount,
+ bool unwritten,
+ const struct xfs_owner_info *oinfo)
+{
+ struct xfs_rmap_update_params p = {
+ .tp = tp,
+ .startblock = startblock,
+ .blockcount = blockcount,
+ .unwritten = unwritten,
+ .oinfo = *oinfo, /* struct copy */
+ };
+
+ if (pag)
+ xfs_hook_call(&pag->pag_rmap_update_hooks, op, &p);
+}
+#else
+# define xfs_rmap_update_hook(t, p, o, s, b, u, oi) do { } while(0)
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
/*
* Remove a reference to an extent in the rmap btree.
*/
@@ -825,7 +852,7 @@ xfs_rmap_free(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
-
+ xfs_rmap_update_hook(tp, pag, XFS_RMAP_UNMAP, bno, len, false, oinfo);
error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -1069,6 +1096,7 @@ xfs_rmap_alloc(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
+ xfs_rmap_update_hook(tp, pag, XFS_RMAP_MAP, bno, len, false, oinfo);
error = xfs_rmap_map(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -2555,40 +2583,60 @@ xfs_rmap_finish_one(
ri->ri_bmap.br_startoff);
unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN;
- switch (ri->ri_type) {
+ xfs_rmap_update_hook(tp, pag, ri->ri_type, bno,
+ ri->ri_bmap.br_blockcount, unwritten, &oinfo);
+ error = __xfs_rmap_finish_intent(rcur, ri->ri_type, bno,
+ ri->ri_bmap.br_blockcount, &oinfo, unwritten);
+out_drop:
+ if (pag)
+ xfs_perag_put(pag);
+ return error;
+}
+
+/* Complete an rmap operation. */
+int
+__xfs_rmap_finish_intent(
+ struct xfs_btree_cur *rcur,
+ enum xfs_rmap_intent_type op,
+ xfs_fsblock_t startblock,
+ xfs_filblks_t blockcount,
+ const struct xfs_owner_info *oinfo,
+ bool unwritten)
+{
+ int error;
+
+ switch (op) {
case XFS_RMAP_ALLOC:
case XFS_RMAP_MAP:
- error = xfs_rmap_map(rcur, bno, ri->ri_bmap.br_blockcount,
- unwritten, &oinfo);
+ error = xfs_rmap_map(rcur, startblock, blockcount, unwritten,
+ oinfo);
break;
case XFS_RMAP_MAP_SHARED:
- error = xfs_rmap_map_shared(rcur, bno,
- ri->ri_bmap.br_blockcount, unwritten, &oinfo);
+ error = xfs_rmap_map_shared(rcur, startblock, blockcount,
+ unwritten, oinfo);
break;
case XFS_RMAP_FREE:
case XFS_RMAP_UNMAP:
- error = xfs_rmap_unmap(rcur, bno, ri->ri_bmap.br_blockcount,
- unwritten, &oinfo);
+ error = xfs_rmap_unmap(rcur, startblock, blockcount, unwritten,
+ oinfo);
break;
case XFS_RMAP_UNMAP_SHARED:
- error = xfs_rmap_unmap_shared(rcur, bno,
- ri->ri_bmap.br_blockcount, unwritten, &oinfo);
+ error = xfs_rmap_unmap_shared(rcur, startblock, blockcount,
+ unwritten, oinfo);
break;
case XFS_RMAP_CONVERT:
- error = xfs_rmap_convert(rcur, bno, ri->ri_bmap.br_blockcount,
- !unwritten, &oinfo);
+ error = xfs_rmap_convert(rcur, startblock, blockcount,
+ !unwritten, oinfo);
break;
case XFS_RMAP_CONVERT_SHARED:
- error = xfs_rmap_convert_shared(rcur, bno,
- ri->ri_bmap.br_blockcount, !unwritten, &oinfo);
+ error = xfs_rmap_convert_shared(rcur, startblock, blockcount,
+ !unwritten, oinfo);
break;
default:
ASSERT(0);
error = -EFSCORRUPTED;
}
-out_drop:
- if (pag)
- xfs_perag_put(pag);
+
return error;
}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index aaa6e6b94f67..26d6c75c6b8f 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -192,6 +192,10 @@ void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
struct xfs_btree_cur *rcur, int error);
int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri,
struct xfs_btree_cur **pcur);
+int __xfs_rmap_finish_intent(struct xfs_btree_cur *rcur,
+ enum xfs_rmap_intent_type op, xfs_fsblock_t startblock,
+ xfs_filblks_t blockcount, const struct xfs_owner_info *oinfo,
+ bool unwritten);
int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_fsblock_t bno,
uint64_t owner, uint64_t offset, unsigned int flags,
struct xfs_rmap_irec *irec, int *stat);
@@ -223,4 +227,16 @@ extern const struct xfs_owner_info XFS_RMAP_OINFO_COW;
int xfs_rmap_map_immediate(struct xfs_btree_cur *mcur,
struct xfs_rmap_irec *irec);
+/*
+ * Parameters for tracking reverse mapping changes. The hook function arg
+ * parameter is enum xfs_rmap_intent_type, and the rest is below.
+ */
+struct xfs_rmap_update_params {
+ struct xfs_trans *tp;
+ xfs_fsblock_t startblock;
+ xfs_filblks_t blockcount;
+ struct xfs_owner_info oinfo;
+ bool unwritten;
+};
+
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c
index aca54cc7c1b0..e22b870c399f 100644
--- a/fs/xfs/scrub/rmap_repair.c
+++ b/fs/xfs/scrub/rmap_repair.c
@@ -151,6 +151,9 @@ struct xrep_rmap {
struct xrep_newbt new_btree_info;
struct xfs_btree_bload rmap_bload;
+ /* lock for the xfbtree and xfile */
+ struct mutex lock;
+
/* rmap records generated from primary metadata */
struct xfbtree *rmap_btree;
/* in-memory btree cursor for the xfs_btree_bload iteration */
@@ -161,6 +164,9 @@ struct xrep_rmap {
/* staged rmap btree cursor */
struct xfs_btree_cur *cur;
+ /* Hooks into rmap update code. */
+ struct notifier_block rmap_update_hook;
+
/* inode scan cursor */
struct xchk_iscan iscan;
@@ -234,12 +240,16 @@ xrep_rmap_stash(
if (xchk_should_terminate(sc, &error))
return error;
+ if (xchk_iscan_aborted(&rr->iscan))
+ return -EFSCORRUPTED;
+
trace_xrep_rmap_found(sc->mp, sc->sa.pag->pag_agno, &rmap);
+ mutex_lock(&rr->lock);
error = xfs_btree_mem_head_read_buf(rr->rmap_btree->target, sc->tp,
&mhead_bp);
if (error)
- return error;
+ goto out_abort;
mcur = xfs_rmapbt_mem_cursor(sc->mp, sc->tp, mhead_bp, rr->rmap_btree);
error = xfs_rmap_map_immediate(mcur, &rmap);
@@ -247,10 +257,18 @@ xrep_rmap_stash(
if (error)
goto out_cancel;
- return xfbtree_trans_commit(rr->rmap_btree, sc->tp);
+ error = xfbtree_trans_commit(rr->rmap_btree, sc->tp);
+ if (error)
+ goto out_abort;
+
+ mutex_unlock(&rr->lock);
+ return 0;
out_cancel:
xfbtree_trans_cancel(rr->rmap_btree, sc->tp);
+out_abort:
+ xchk_iscan_abort(&rr->iscan);
+ mutex_unlock(&rr->lock);
return error;
}
@@ -969,6 +987,13 @@ end_agscan:
return error;
/*
+ * If a hook failed to update the in-memory btree, we lack the data to
+ * continue the repair.
+ */
+ if (xchk_iscan_aborted(&rr->iscan))
+ return -EFSCORRUPTED;
+
+ /*
* Now that we have everything locked again, we need to count the
* number of rmap records stashed in the btree. This should reflect
* all actively-owned space in the filesystem. At the same time, check
@@ -1542,6 +1567,86 @@ out_bitmap:
return error;
}
+static inline bool
+xrep_rmapbt_want_live_update(
+ struct xchk_iscan *iscan,
+ const struct xfs_owner_info *oi)
+{
+ /*
+ * Before unlocking the AG header to perform the inode scan, we
+ * recorded reverse mappings for all AG metadata except for the OWN_AG
+ * metadata. IOWs, the in-memory btree knows about the AG headers, the
+ * two inode btrees, the CoW staging extents, and the refcount btrees.
+ * For these types of metadata, we need to record the live updates in
+ * the in-memory rmap btree.
+ *
+ * However, we do not scan the free space btrees or the AGFL until we
+ * have re-locked the AGF and are ready to reserve space for the new
+ * new rmap btree, so we do not want live updates for OWN_AG metadata.
+ */
+ if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
+ return oi->oi_owner != XFS_RMAP_OWN_AG;
+
+ /* Ignore updates to files that the scanner hasn't visited yet. */
+ return xchk_iscan_want_live_update(iscan, oi->oi_owner);
+}
+
+/*
+ * Apply a rmapbt update from the regular filesystem into our shadow btree.
+ * We're running from the thread that owns the AGF buffer and is generating
+ * the update, so we must be careful about which parts of the struct xrep_rmap
+ * that we change.
+ */
+static int
+xrep_rmapbt_live_update(
+ struct notifier_block *nb,
+ unsigned long op,
+ void *data)
+{
+ struct xfs_rmap_update_params *p = data;
+ struct xrep_rmap *rr;
+ struct xfs_mount *mp;
+ struct xfs_btree_cur *mcur;
+ struct xfs_buf *mhead_bp;
+ int error;
+
+ rr = container_of(nb, struct xrep_rmap, rmap_update_hook);
+ mp = rr->sc->mp;
+
+ if (!xrep_rmapbt_want_live_update(&rr->iscan, &p->oinfo))
+ goto out_unlock;
+
+ trace_xrep_rmap_live_update(mp, rr->sc->sa.pag->pag_agno, op, p);
+
+ mutex_lock(&rr->lock);
+ error = xfs_btree_mem_head_read_buf(rr->rmap_btree->target, p->tp,
+ &mhead_bp);
+ if (error)
+ goto out_abort;
+
+ mcur = xfs_rmapbt_mem_cursor(mp, p->tp, mhead_bp, rr->rmap_btree);
+ error = __xfs_rmap_finish_intent(mcur, op, p->startblock,
+ p->blockcount, &p->oinfo, p->unwritten);
+ xfs_btree_del_cursor(mcur, error);
+ if (error)
+ goto out_cancel;
+
+ error = xfbtree_trans_commit(rr->rmap_btree, p->tp);
+ if (error)
+ goto out_abort;
+
+ mutex_unlock(&rr->lock);
+ return NOTIFY_DONE;
+
+out_cancel:
+ xfbtree_trans_cancel(rr->rmap_btree, p->tp);
+out_abort:
+ mutex_unlock(&rr->lock);
+ xchk_iscan_abort(&rr->iscan);
+out_unlock:
+ return NOTIFY_DONE;
+}
+
/* Repair the rmap btree for some AG. */
int
xrep_rmapbt(
@@ -1554,6 +1659,7 @@ xrep_rmapbt(
if (!rr)
return -ENOMEM;
rr->sc = sc;
+ mutex_init(&rr->lock);
/* Set up in-memory rmap btree */
rr->rmap_btree = xfs_rmapbt_mem_create(sc->mp, "rmap btree");
@@ -1566,25 +1672,40 @@ xrep_rmapbt(
xchk_iscan_start(&rr->iscan);
/*
+ * Hook into live rmap operations so that we can update our in-memory
+ * btree to reflect live changes on the filesystem. Since we drop the
+ * AGF buffer to scan all the inodes, we need this piece to avoid
+ * installing a stale btree.
+ */
+ error = xfs_hook_add(&sc->sa.pag->pag_rmap_update_hooks,
+ &rr->rmap_update_hook, xrep_rmapbt_live_update);
+ if (error)
+ goto out_records;
+
+ /*
* Collect rmaps for everything in this AG that isn't space metadata.
* These rmaps won't change even as we try to allocate blocks.
*/
error = xrep_rmap_find_rmaps(rr);
if (error)
- goto out_records;
+ goto out_abort;
/* Rebuild the rmap information. */
error = xrep_rmap_build_new_tree(rr);
if (error)
- goto out_records;
+ goto out_abort;
/* Kill the old tree. */
error = xrep_rmap_remove_old_tree(rr);
+out_abort:
+ xchk_iscan_abort(&rr->iscan);
+ xfs_hook_del(&sc->sa.pag->pag_rmap_update_hooks, &rr->rmap_update_hook);
out_records:
xchk_iscan_finish(&rr->iscan);
xfbtree_destroy(rr->rmap_btree);
out_rr:
+ mutex_destroy(&rr->lock);
kmem_free(rr);
return error;
}
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 1dd13c488a26..dc49e4176ff0 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -16,6 +16,7 @@
#include "xfs_ag.h"
#include "xfs_quota_defs.h"
#include "xfs_da_format.h"
+#include "xfs_rmap.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index fc259c34e8b0..8a7c31e3855b 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -23,6 +23,7 @@ struct xfarray;
struct xchk_iscan;
struct xchk_nlink;
struct xfbtree;
+struct xfs_rmap_update_params;
/*
* ftrace's __print_symbolic requires that all enum values be wrapped in the
@@ -2205,6 +2206,52 @@ DEFINE_EVENT(xfbtree_freesp_class, name, \
TP_ARGS(xfbt, cur, fileoff))
DEFINE_XFBTREE_FREESP_EVENT(xfbtree_alloc_block);
DEFINE_XFBTREE_FREESP_EVENT(xfbtree_free_block);
+
+TRACE_DEFINE_ENUM(XFS_RMAP_MAP);
+TRACE_DEFINE_ENUM(XFS_RMAP_MAP_SHARED);
+TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP);
+TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP_SHARED);
+TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT);
+TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT_SHARED);
+TRACE_DEFINE_ENUM(XFS_RMAP_ALLOC);
+TRACE_DEFINE_ENUM(XFS_RMAP_FREE);
+
+TRACE_EVENT(xrep_rmap_live_update,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int op,
+ const struct xfs_rmap_update_params *p),
+ TP_ARGS(mp, agno, op, p),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(unsigned int, op)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, len)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->op = op;
+ __entry->agbno = p->startblock;
+ __entry->len = p->blockcount;
+ xfs_owner_info_unpack(&p->oinfo, &__entry->owner,
+ &__entry->offset, &__entry->flags);
+ if (p->unwritten)
+ __entry->flags |= XFS_RMAP_UNWRITTEN;
+ ),
+ TP_printk("dev %d:%d agno 0x%x op %s agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __print_symbolic(__entry->op, XFS_RMAP_INTENT_STRINGS),
+ __entry->agbno,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */