From b2040cb16f3e80615c52095d32b5a1a5f0eb3919 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Jul 2022 11:06:34 -0700 Subject: xfs: hook live rmap operations during a repair operation Hook the regular rmap code when an rmapbt repair operation is running so that we can unlock the AGF buffer to scan the filesystem and keep the in-memory btree up to date during the scan. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/common.c | 3 + fs/xfs/scrub/repair.c | 36 +++++++++++ fs/xfs/scrub/repair.h | 4 ++ fs/xfs/scrub/rmap_repair.c | 146 ++++++++++++++++++++++++++++++++++++++++++--- fs/xfs/scrub/scrub.c | 4 ++ fs/xfs/scrub/scrub.h | 4 +- fs/xfs/scrub/trace.c | 1 + fs/xfs/scrub/trace.h | 47 +++++++++++++++ 8 files changed, 237 insertions(+), 8 deletions(-) (limited to 'fs/xfs/scrub') diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index befd95b6db98..ae168d1bab4a 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1250,5 +1250,8 @@ xchk_fshooks_enable( if (scrub_fshooks & XCHK_FSHOOKS_NLINKS) xfs_nlink_hook_enable(); + if (scrub_fshooks & XCHK_FSHOOKS_RMAP) + xfs_rmap_hook_enable(); + sc->flags |= scrub_fshooks; } diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 532eebb79605..56422e5bde38 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -1127,3 +1127,39 @@ xrep_metadata_inode_forks( return 0; } + +/* + * Create a dummy transaction for use in a live update hook function. This + * function MUST NOT be called from regular repair code because the current + * process' transaction is saved via the cookie. + */ +int +xrep_trans_alloc_hook_dummy( + struct xfs_mount *mp, + void **cookiep, + struct xfs_trans **tpp) +{ + int error; + + *cookiep = current->journal_info; + current->journal_info = NULL; + + error = xfs_trans_alloc_empty(mp, tpp); + if (!error) + return 0; + + current->journal_info = *cookiep; + *cookiep = NULL; + return error; +} + +/* Cancel a dummy transaction used by a live update hook function. */ +void +xrep_trans_cancel_hook_dummy( + void **cookiep, + struct xfs_trans *tp) +{ + xfs_trans_cancel(tp); + current->journal_info = *cookiep; + *cookiep = NULL; +} diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 93c9fd3e1496..f9ec8f4bfee9 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -129,6 +129,10 @@ int xrep_quotacheck(struct xfs_scrub *sc); int xrep_reinit_pagf(struct xfs_scrub *sc); int xrep_reinit_pagi(struct xfs_scrub *sc); +int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep, + struct xfs_trans **tpp); +void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp); + #else #define xrep_will_attempt(sc) (false) diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c index 0ab1d9d87a89..ee7bc062c352 100644 --- a/fs/xfs/scrub/rmap_repair.c +++ b/fs/xfs/scrub/rmap_repair.c @@ -128,6 +128,8 @@ xrep_setup_ag_rmapbt( { int error; + xchk_fshooks_enable(sc, XCHK_FSHOOKS_RMAP); + error = xfile_create(sc->mp, "rmapbt repair", 0, &sc->xfile); if (error) return error; @@ -144,6 +146,9 @@ struct xrep_rmap { /* new rmapbt information */ struct xrep_newbt new_btree; + /* lock for the xfbtree and xfile */ + struct mutex lock; + /* rmap records generated from primary metadata */ struct xfbtree *rmap_btree; @@ -152,6 +157,9 @@ struct xrep_rmap { /* in-memory btree cursor for the xfs_btree_bload iteration */ struct xfs_btree_cur *mcur; + /* Hooks into rmap update code. */ + struct xfs_rmap_hook hooks; + /* inode scan cursor */ struct xchk_iscan iscan; @@ -213,11 +221,15 @@ xrep_rmap_stash( if (xchk_should_terminate(sc, &error)) return error; + if (xchk_iscan_aborted(&rr->iscan)) + return -EFSCORRUPTED; + trace_xrep_rmap_found(sc->mp, sc->sa.pag->pag_agno, &rmap); + mutex_lock(&rr->lock); error = xfbtree_head_read_buf(rr->rmap_btree, sc->tp, &mhead_bp); if (error) - return error; + goto out_abort; mcur = xfs_rmapbt_mem_cursor(sc->sa.pag, sc->tp, mhead_bp, rr->rmap_btree); @@ -226,10 +238,18 @@ xrep_rmap_stash( if (error) goto out_cancel; - return xfbtree_trans_commit(rr->rmap_btree, sc->tp); + error = xfbtree_trans_commit(rr->rmap_btree, sc->tp); + if (error) + goto out_abort; + + mutex_unlock(&rr->lock); + return 0; out_cancel: xfbtree_trans_cancel(rr->rmap_btree, sc->tp); +out_abort: + xchk_iscan_abort(&rr->iscan); + mutex_unlock(&rr->lock); return error; } @@ -874,6 +894,13 @@ end_agscan: if (error) return error; + /* + * If a hook failed to update the in-memory btree, we lack the data to + * continue the repair. + */ + if (xchk_iscan_aborted(&rr->iscan)) + return -EFSCORRUPTED; + /* * Now that we have everything locked again, we need to count the * number of rmap records stashed in the btree. This should reflect @@ -1470,6 +1497,97 @@ out_bitmap: return error; } +static inline bool +xrep_rmapbt_want_live_update( + struct xchk_iscan *iscan, + const struct xfs_owner_info *oi) +{ + if (xchk_iscan_aborted(iscan)) + return false; + + /* + * Before unlocking the AG header to perform the inode scan, we + * recorded reverse mappings for all AG metadata except for the OWN_AG + * metadata. IOWs, the in-memory btree knows about the AG headers, the + * two inode btrees, the CoW staging extents, and the refcount btrees. + * For these types of metadata, we need to record the live updates in + * the in-memory rmap btree. + * + * However, we do not scan the free space btrees or the AGFL until we + * have re-locked the AGF and are ready to reserve space for the new + * new rmap btree, so we do not want live updates for OWN_AG metadata. + */ + if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner)) + return oi->oi_owner != XFS_RMAP_OWN_AG; + + /* Ignore updates to files that the scanner hasn't visited yet. */ + return xchk_iscan_want_live_update(iscan, oi->oi_owner); +} + +/* + * Apply a rmapbt update from the regular filesystem into our shadow btree. + * We're running from the thread that owns the AGF buffer and is generating + * the update, so we must be careful about which parts of the struct xrep_rmap + * that we change. + */ +static int +xrep_rmapbt_live_update( + struct xfs_hook *hook, + unsigned long action, + void *data) +{ + struct xfs_rmap_update_params *p = data; + struct xrep_rmap *rr; + struct xfs_mount *mp; + struct xfs_btree_cur *mcur; + struct xfs_buf *mhead_bp; + struct xfs_trans *tp; + void *txcookie; + int error; + + rr = container_of(hook, struct xrep_rmap, hooks.update_hook); + mp = rr->sc->mp; + + if (!xrep_rmapbt_want_live_update(&rr->iscan, &p->oinfo)) + goto out_unlock; + + trace_xrep_rmap_live_update(mp, rr->sc->sa.pag->pag_agno, action, p); + + error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp); + if (error) + goto out_abort; + + mutex_lock(&rr->lock); + error = xfbtree_head_read_buf(rr->rmap_btree, tp, &mhead_bp); + if (error) + goto out_cancel; + + mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, tp, mhead_bp, + rr->rmap_btree); + error = __xfs_rmap_finish_intent(mcur, action, p->startblock, + p->blockcount, &p->oinfo, p->unwritten); + xfs_btree_del_cursor(mcur, error); + if (error) + goto out_cancel; + + error = xfbtree_trans_commit(rr->rmap_btree, tp); + if (error) + goto out_cancel; + + xrep_trans_cancel_hook_dummy(&txcookie, tp); + mutex_unlock(&rr->lock); + return NOTIFY_DONE; + +out_cancel: + xfbtree_trans_cancel(rr->rmap_btree, tp); + xrep_trans_cancel_hook_dummy(&txcookie, tp); +out_abort: + mutex_unlock(&rr->lock); + xchk_iscan_abort(&rr->iscan); +out_unlock: + return NOTIFY_DONE; +} + /* Repair the rmap btree for some AG. */ int xrep_rmapbt( @@ -1478,13 +1596,11 @@ xrep_rmapbt( struct xrep_rmap *rr; int error; - /* Functionality is not yet complete. */ - return xrep_notsupported(sc); - rr = kzalloc(sizeof(struct xrep_rmap), XCHK_GFP_FLAGS); if (!rr) return -ENOMEM; rr->sc = sc; + mutex_init(&rr->lock); /* Set up in-memory rmap btree */ error = xfs_rmapbt_mem_create(sc->mp, sc->sa.pag->pag_agno, @@ -1495,26 +1611,42 @@ xrep_rmapbt( /* Retry iget every tenth of a second for up to 30 seconds. */ xchk_iscan_start(&rr->iscan, 30000, 100); + /* + * Hook into live rmap operations so that we can update our in-memory + * btree to reflect live changes on the filesystem. Since we drop the + * AGF buffer to scan all the inodes, we need this piece to avoid + * installing a stale btree. + */ + ASSERT(sc->flags & XCHK_FSHOOKS_RMAP); + xfs_hook_setup(&rr->hooks.update_hook, xrep_rmapbt_live_update); + error = xfs_rmap_hook_add(sc->sa.pag, &rr->hooks); + if (error) + goto out_records; + /* * Collect rmaps for everything in this AG that isn't space metadata. * These rmaps won't change even as we try to allocate blocks. */ error = xrep_rmap_find_rmaps(rr); if (error) - goto out_records; + goto out_abort; /* Rebuild the rmap information. */ error = xrep_rmap_build_new_tree(rr); if (error) - goto out_records; + goto out_abort; /* Kill the old tree. */ error = xrep_rmap_remove_old_tree(rr); +out_abort: + xchk_iscan_abort(&rr->iscan); + xfs_rmap_hook_del(sc->sa.pag, &rr->hooks); out_records: xchk_iscan_finish(&rr->iscan); xfbtree_destroy(rr->rmap_btree); out_rr: + mutex_destroy(&rr->lock); kfree(rr); return error; } diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index f030311fae2b..c6eb692a0822 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -17,6 +17,7 @@ #include "xfs_scrub.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" +#include "xfs_rmap.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -164,6 +165,9 @@ xchk_fshooks_disable( if (sc->flags & XCHK_FSHOOKS_NLINKS) xfs_nlink_hook_disable(); + if (sc->flags & XCHK_FSHOOKS_RMAP) + xfs_rmap_hook_disable(); + sc->flags &= ~XCHK_FSHOOKS_ALL; } diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 6fe59d1a2518..cf18bb4e8b35 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -126,12 +126,14 @@ struct xfs_scrub { #define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to use intent drain */ #define XCHK_FSHOOKS_QUOTA (1 << 4) /* quota live update enabled */ #define XCHK_FSHOOKS_NLINKS (1 << 5) /* link count live update enabled */ +#define XCHK_FSHOOKS_RMAP (1 << 6) /* rmapbt live update enabled */ #define XREP_RESET_PERAG_RESV (1 << 30) /* must reset AG space reservation */ #define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */ #define XCHK_FSHOOKS_ALL (XCHK_FSHOOKS_DRAIN | \ XCHK_FSHOOKS_QUOTA | \ - XCHK_FSHOOKS_NLINKS) + XCHK_FSHOOKS_NLINKS | \ + XCHK_FSHOOKS_RMAP) /* Metadata scrubbers */ int xchk_tester(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 177fc4c75507..f8f50c5a02c0 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -18,6 +18,7 @@ #include "xfs_dir2.h" #include "xfs_da_format.h" #include "xfs_btree_mem.h" +#include "xfs_rmap.h" #include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index f419405cb2e7..ce95c88d199b 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -26,6 +26,7 @@ struct xchk_nlink; struct xchk_fscounters; struct xfbtree; struct xfbtree_config; +struct xfs_rmap_update_params; /* * ftrace's __print_symbolic requires that all enum values be wrapped in the @@ -121,6 +122,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY); { XCHK_NEED_DRAIN, "need_drain" }, \ { XCHK_FSHOOKS_QUOTA, "fshooks_quota" }, \ { XCHK_FSHOOKS_NLINKS, "fshooks_nlinks" }, \ + { XCHK_FSHOOKS_RMAP, "fshooks_rmap" }, \ { XREP_RESET_PERAG_RESV, "reset_perag_resv" }, \ { XREP_ALREADY_FIXED, "already_fixed" } @@ -2112,6 +2114,51 @@ DEFINE_EVENT(xfbtree_freesp_class, name, \ DEFINE_XFBTREE_FREESP_EVENT(xfbtree_alloc_block); DEFINE_XFBTREE_FREESP_EVENT(xfbtree_free_block); +TRACE_DEFINE_ENUM(XFS_RMAP_MAP); +TRACE_DEFINE_ENUM(XFS_RMAP_MAP_SHARED); +TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP); +TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP_SHARED); +TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT); +TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT_SHARED); +TRACE_DEFINE_ENUM(XFS_RMAP_ALLOC); +TRACE_DEFINE_ENUM(XFS_RMAP_FREE); + +TRACE_EVENT(xrep_rmap_live_update, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int op, + const struct xfs_rmap_update_params *p), + TP_ARGS(mp, agno, op, p), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(unsigned int, op) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(uint64_t, owner) + __field(uint64_t, offset) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->op = op; + __entry->agbno = p->startblock; + __entry->len = p->blockcount; + xfs_owner_info_unpack(&p->oinfo, &__entry->owner, + &__entry->offset, &__entry->flags); + if (p->unwritten) + __entry->flags |= XFS_RMAP_UNWRITTEN; + ), + TP_printk("dev %d:%d agno 0x%x op %d agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->op, + __entry->agbno, + __entry->len, + __entry->owner, + __entry->offset, + __entry->flags) +); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ -- cgit v1.2.3