diff options
author | Darrick J. Wong <djwong@kernel.org> | 2021-09-29 18:43:26 -0700 |
---|---|---|
committer | Darrick J. Wong <djwong@kernel.org> | 2021-10-22 16:41:16 -0700 |
commit | 89b052a624167d8f8d39c706669e3b0ae6107173 (patch) | |
tree | 81fcce35b30186cc68a7ca93e735d16108ea7b9f | |
parent | c0f26249c5af3ed541cab86313f6ebaf62e7c9a3 (diff) |
xfs: hook live rmap operations during a repair operation
Hook the regular rmap code when an rmapbt repair operation is running so
that we can unlock the AGF buffer to scan the filesystem and keep the
in-memory btree up to date during the scan.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.c | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.h | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_rmap.c | 82 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_rmap.h | 16 | ||||
-rw-r--r-- | fs/xfs/scrub/rmap_repair.c | 129 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 47 |
7 files changed, 258 insertions, 21 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index b6a9f6dde55a..9100cf7417b9 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -263,6 +263,7 @@ xfs_initialize_perag( init_waitqueue_head(&pag->pagb_wait); pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; + xfs_hook_init(&pag->pag_rmap_update_hooks); error = xfs_buf_hash_init(pag); if (error) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index cfc51a5af74a..cc52f628acb3 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -114,6 +114,9 @@ struct xfs_perag { atomic_t pag_intents; wait_queue_head_t pag_intents_wq; #endif + + /* online rmap repair stuff */ + struct xfs_hook_chain pag_rmap_update_hooks; }; int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 0f21934ddcbf..a6be6c7fe6e5 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -805,6 +805,33 @@ out_error: return error; } +/* Call a hook to capture deferred rmapbt updates in real time. */ +#ifdef CONFIG_XFS_LIVE_HOOKS +static inline void +xfs_rmap_update_hook( + struct xfs_trans *tp, + struct xfs_perag *pag, + enum xfs_rmap_intent_type op, + xfs_fsblock_t startblock, + xfs_filblks_t blockcount, + bool unwritten, + const struct xfs_owner_info *oinfo) +{ + struct xfs_rmap_update_params p = { + .tp = tp, + .startblock = startblock, + .blockcount = blockcount, + .unwritten = unwritten, + .oinfo = *oinfo, /* struct copy */ + }; + + if (pag) + xfs_hook_call(&pag->pag_rmap_update_hooks, op, &p); +} +#else +# define xfs_rmap_update_hook(t, p, o, s, b, u, oi) do { } while(0) +#endif /* CONFIG_XFS_LIVE_HOOKS */ + /* * Remove a reference to an extent in the rmap btree. */ @@ -825,7 +852,7 @@ xfs_rmap_free( return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); - + xfs_rmap_update_hook(tp, pag, XFS_RMAP_UNMAP, bno, len, false, oinfo); error = xfs_rmap_unmap(cur, bno, len, false, oinfo); xfs_btree_del_cursor(cur, error); @@ -1069,6 +1096,7 @@ xfs_rmap_alloc( return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); + xfs_rmap_update_hook(tp, pag, XFS_RMAP_MAP, bno, len, false, oinfo); error = xfs_rmap_map(cur, bno, len, false, oinfo); xfs_btree_del_cursor(cur, error); @@ -2555,40 +2583,60 @@ xfs_rmap_finish_one( ri->ri_bmap.br_startoff); unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN; - switch (ri->ri_type) { + xfs_rmap_update_hook(tp, pag, ri->ri_type, bno, + ri->ri_bmap.br_blockcount, unwritten, &oinfo); + error = __xfs_rmap_finish_intent(rcur, ri->ri_type, bno, + ri->ri_bmap.br_blockcount, &oinfo, unwritten); +out_drop: + if (pag) + xfs_perag_put(pag); + return error; +} + +/* Complete an rmap operation. */ +int +__xfs_rmap_finish_intent( + struct xfs_btree_cur *rcur, + enum xfs_rmap_intent_type op, + xfs_fsblock_t startblock, + xfs_filblks_t blockcount, + const struct xfs_owner_info *oinfo, + bool unwritten) +{ + int error; + + switch (op) { case XFS_RMAP_ALLOC: case XFS_RMAP_MAP: - error = xfs_rmap_map(rcur, bno, ri->ri_bmap.br_blockcount, - unwritten, &oinfo); + error = xfs_rmap_map(rcur, startblock, blockcount, unwritten, + oinfo); break; case XFS_RMAP_MAP_SHARED: - error = xfs_rmap_map_shared(rcur, bno, - ri->ri_bmap.br_blockcount, unwritten, &oinfo); + error = xfs_rmap_map_shared(rcur, startblock, blockcount, + unwritten, oinfo); break; case XFS_RMAP_FREE: case XFS_RMAP_UNMAP: - error = xfs_rmap_unmap(rcur, bno, ri->ri_bmap.br_blockcount, - unwritten, &oinfo); + error = xfs_rmap_unmap(rcur, startblock, blockcount, unwritten, + oinfo); break; case XFS_RMAP_UNMAP_SHARED: - error = xfs_rmap_unmap_shared(rcur, bno, - ri->ri_bmap.br_blockcount, unwritten, &oinfo); + error = xfs_rmap_unmap_shared(rcur, startblock, blockcount, + unwritten, oinfo); break; case XFS_RMAP_CONVERT: - error = xfs_rmap_convert(rcur, bno, ri->ri_bmap.br_blockcount, - !unwritten, &oinfo); + error = xfs_rmap_convert(rcur, startblock, blockcount, + !unwritten, oinfo); break; case XFS_RMAP_CONVERT_SHARED: - error = xfs_rmap_convert_shared(rcur, bno, - ri->ri_bmap.br_blockcount, !unwritten, &oinfo); + error = xfs_rmap_convert_shared(rcur, startblock, blockcount, + !unwritten, oinfo); break; default: ASSERT(0); error = -EFSCORRUPTED; } -out_drop: - if (pag) - xfs_perag_put(pag); + return error; } diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index aaa6e6b94f67..26d6c75c6b8f 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -192,6 +192,10 @@ void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error); int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri, struct xfs_btree_cur **pcur); +int __xfs_rmap_finish_intent(struct xfs_btree_cur *rcur, + enum xfs_rmap_intent_type op, xfs_fsblock_t startblock, + xfs_filblks_t blockcount, const struct xfs_owner_info *oinfo, + bool unwritten); int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_fsblock_t bno, uint64_t owner, uint64_t offset, unsigned int flags, struct xfs_rmap_irec *irec, int *stat); @@ -223,4 +227,16 @@ extern const struct xfs_owner_info XFS_RMAP_OINFO_COW; int xfs_rmap_map_immediate(struct xfs_btree_cur *mcur, struct xfs_rmap_irec *irec); +/* + * Parameters for tracking reverse mapping changes. The hook function arg + * parameter is enum xfs_rmap_intent_type, and the rest is below. + */ +struct xfs_rmap_update_params { + struct xfs_trans *tp; + xfs_fsblock_t startblock; + xfs_filblks_t blockcount; + struct xfs_owner_info oinfo; + bool unwritten; +}; + #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c index aca54cc7c1b0..e22b870c399f 100644 --- a/fs/xfs/scrub/rmap_repair.c +++ b/fs/xfs/scrub/rmap_repair.c @@ -151,6 +151,9 @@ struct xrep_rmap { struct xrep_newbt new_btree_info; struct xfs_btree_bload rmap_bload; + /* lock for the xfbtree and xfile */ + struct mutex lock; + /* rmap records generated from primary metadata */ struct xfbtree *rmap_btree; /* in-memory btree cursor for the xfs_btree_bload iteration */ @@ -161,6 +164,9 @@ struct xrep_rmap { /* staged rmap btree cursor */ struct xfs_btree_cur *cur; + /* Hooks into rmap update code. */ + struct notifier_block rmap_update_hook; + /* inode scan cursor */ struct xchk_iscan iscan; @@ -234,12 +240,16 @@ xrep_rmap_stash( if (xchk_should_terminate(sc, &error)) return error; + if (xchk_iscan_aborted(&rr->iscan)) + return -EFSCORRUPTED; + trace_xrep_rmap_found(sc->mp, sc->sa.pag->pag_agno, &rmap); + mutex_lock(&rr->lock); error = xfs_btree_mem_head_read_buf(rr->rmap_btree->target, sc->tp, &mhead_bp); if (error) - return error; + goto out_abort; mcur = xfs_rmapbt_mem_cursor(sc->mp, sc->tp, mhead_bp, rr->rmap_btree); error = xfs_rmap_map_immediate(mcur, &rmap); @@ -247,10 +257,18 @@ xrep_rmap_stash( if (error) goto out_cancel; - return xfbtree_trans_commit(rr->rmap_btree, sc->tp); + error = xfbtree_trans_commit(rr->rmap_btree, sc->tp); + if (error) + goto out_abort; + + mutex_unlock(&rr->lock); + return 0; out_cancel: xfbtree_trans_cancel(rr->rmap_btree, sc->tp); +out_abort: + xchk_iscan_abort(&rr->iscan); + mutex_unlock(&rr->lock); return error; } @@ -969,6 +987,13 @@ end_agscan: return error; /* + * If a hook failed to update the in-memory btree, we lack the data to + * continue the repair. + */ + if (xchk_iscan_aborted(&rr->iscan)) + return -EFSCORRUPTED; + + /* * Now that we have everything locked again, we need to count the * number of rmap records stashed in the btree. This should reflect * all actively-owned space in the filesystem. At the same time, check @@ -1542,6 +1567,86 @@ out_bitmap: return error; } +static inline bool +xrep_rmapbt_want_live_update( + struct xchk_iscan *iscan, + const struct xfs_owner_info *oi) +{ + /* + * Before unlocking the AG header to perform the inode scan, we + * recorded reverse mappings for all AG metadata except for the OWN_AG + * metadata. IOWs, the in-memory btree knows about the AG headers, the + * two inode btrees, the CoW staging extents, and the refcount btrees. + * For these types of metadata, we need to record the live updates in + * the in-memory rmap btree. + * + * However, we do not scan the free space btrees or the AGFL until we + * have re-locked the AGF and are ready to reserve space for the new + * new rmap btree, so we do not want live updates for OWN_AG metadata. + */ + if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner)) + return oi->oi_owner != XFS_RMAP_OWN_AG; + + /* Ignore updates to files that the scanner hasn't visited yet. */ + return xchk_iscan_want_live_update(iscan, oi->oi_owner); +} + +/* + * Apply a rmapbt update from the regular filesystem into our shadow btree. + * We're running from the thread that owns the AGF buffer and is generating + * the update, so we must be careful about which parts of the struct xrep_rmap + * that we change. + */ +static int +xrep_rmapbt_live_update( + struct notifier_block *nb, + unsigned long op, + void *data) +{ + struct xfs_rmap_update_params *p = data; + struct xrep_rmap *rr; + struct xfs_mount *mp; + struct xfs_btree_cur *mcur; + struct xfs_buf *mhead_bp; + int error; + + rr = container_of(nb, struct xrep_rmap, rmap_update_hook); + mp = rr->sc->mp; + + if (!xrep_rmapbt_want_live_update(&rr->iscan, &p->oinfo)) + goto out_unlock; + + trace_xrep_rmap_live_update(mp, rr->sc->sa.pag->pag_agno, op, p); + + mutex_lock(&rr->lock); + error = xfs_btree_mem_head_read_buf(rr->rmap_btree->target, p->tp, + &mhead_bp); + if (error) + goto out_abort; + + mcur = xfs_rmapbt_mem_cursor(mp, p->tp, mhead_bp, rr->rmap_btree); + error = __xfs_rmap_finish_intent(mcur, op, p->startblock, + p->blockcount, &p->oinfo, p->unwritten); + xfs_btree_del_cursor(mcur, error); + if (error) + goto out_cancel; + + error = xfbtree_trans_commit(rr->rmap_btree, p->tp); + if (error) + goto out_abort; + + mutex_unlock(&rr->lock); + return NOTIFY_DONE; + +out_cancel: + xfbtree_trans_cancel(rr->rmap_btree, p->tp); +out_abort: + mutex_unlock(&rr->lock); + xchk_iscan_abort(&rr->iscan); +out_unlock: + return NOTIFY_DONE; +} + /* Repair the rmap btree for some AG. */ int xrep_rmapbt( @@ -1554,6 +1659,7 @@ xrep_rmapbt( if (!rr) return -ENOMEM; rr->sc = sc; + mutex_init(&rr->lock); /* Set up in-memory rmap btree */ rr->rmap_btree = xfs_rmapbt_mem_create(sc->mp, "rmap btree"); @@ -1566,25 +1672,40 @@ xrep_rmapbt( xchk_iscan_start(&rr->iscan); /* + * Hook into live rmap operations so that we can update our in-memory + * btree to reflect live changes on the filesystem. Since we drop the + * AGF buffer to scan all the inodes, we need this piece to avoid + * installing a stale btree. + */ + error = xfs_hook_add(&sc->sa.pag->pag_rmap_update_hooks, + &rr->rmap_update_hook, xrep_rmapbt_live_update); + if (error) + goto out_records; + + /* * Collect rmaps for everything in this AG that isn't space metadata. * These rmaps won't change even as we try to allocate blocks. */ error = xrep_rmap_find_rmaps(rr); if (error) - goto out_records; + goto out_abort; /* Rebuild the rmap information. */ error = xrep_rmap_build_new_tree(rr); if (error) - goto out_records; + goto out_abort; /* Kill the old tree. */ error = xrep_rmap_remove_old_tree(rr); +out_abort: + xchk_iscan_abort(&rr->iscan); + xfs_hook_del(&sc->sa.pag->pag_rmap_update_hooks, &rr->rmap_update_hook); out_records: xchk_iscan_finish(&rr->iscan); xfbtree_destroy(rr->rmap_btree); out_rr: + mutex_destroy(&rr->lock); kmem_free(rr); return error; } diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 1dd13c488a26..dc49e4176ff0 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -16,6 +16,7 @@ #include "xfs_ag.h" #include "xfs_quota_defs.h" #include "xfs_da_format.h" +#include "xfs_rmap.h" #include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index fc259c34e8b0..8a7c31e3855b 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -23,6 +23,7 @@ struct xfarray; struct xchk_iscan; struct xchk_nlink; struct xfbtree; +struct xfs_rmap_update_params; /* * ftrace's __print_symbolic requires that all enum values be wrapped in the @@ -2205,6 +2206,52 @@ DEFINE_EVENT(xfbtree_freesp_class, name, \ TP_ARGS(xfbt, cur, fileoff)) DEFINE_XFBTREE_FREESP_EVENT(xfbtree_alloc_block); DEFINE_XFBTREE_FREESP_EVENT(xfbtree_free_block); + +TRACE_DEFINE_ENUM(XFS_RMAP_MAP); +TRACE_DEFINE_ENUM(XFS_RMAP_MAP_SHARED); +TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP); +TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP_SHARED); +TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT); +TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT_SHARED); +TRACE_DEFINE_ENUM(XFS_RMAP_ALLOC); +TRACE_DEFINE_ENUM(XFS_RMAP_FREE); + +TRACE_EVENT(xrep_rmap_live_update, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int op, + const struct xfs_rmap_update_params *p), + TP_ARGS(mp, agno, op, p), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(unsigned int, op) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(uint64_t, owner) + __field(uint64_t, offset) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->op = op; + __entry->agbno = p->startblock; + __entry->len = p->blockcount; + xfs_owner_info_unpack(&p->oinfo, &__entry->owner, + &__entry->offset, &__entry->flags); + if (p->unwritten) + __entry->flags |= XFS_RMAP_UNWRITTEN; + ), + TP_printk("dev %d:%d agno 0x%x op %s agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __print_symbolic(__entry->op, XFS_RMAP_INTENT_STRINGS), + __entry->agbno, + __entry->len, + __entry->owner, + __entry->offset, + __entry->flags) +); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ |