diff options
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.c | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.h | 11 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_defer.c | 9 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_defer.h | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/bmap_repair.c | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/common.c | 168 | ||||
-rw-r--r-- | fs/xfs/scrub/common.h | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/inode_repair.c | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.c | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/rtrmap_repair.c | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_item.c | 49 | ||||
-rw-r--r-- | fs/xfs/xfs_extfree_item.c | 29 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 95 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 33 | ||||
-rw-r--r-- | fs/xfs/xfs_refcount_item.c | 26 | ||||
-rw-r--r-- | fs/xfs/xfs_rmap_item.c | 25 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_swapext_item.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 106 |
20 files changed, 597 insertions, 28 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index dc9d78fb7bac..b6a9f6dde55a 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -193,6 +193,9 @@ xfs_free_perag( spin_unlock(&mp->m_perag_lock); ASSERT(pag); ASSERT(atomic_read(&pag->pag_ref) == 0); +#ifdef CONFIG_XFS_ONLINE_SCRUB + ASSERT(atomic_read(&pag->pag_intents) == 0); +#endif cancel_delayed_work_sync(&pag->pag_blockgc_work); xfs_iunlink_destroy(pag); @@ -254,6 +257,9 @@ xfs_initialize_perag( spin_lock_init(&pag->pag_state_lock); INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); +#ifdef CONFIG_XFS_ONLINE_SCRUB + init_waitqueue_head(&pag->pag_intents_wq); +#endif init_waitqueue_head(&pag->pagb_wait); pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index c9e198e62b74..cfc51a5af74a 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -103,6 +103,17 @@ struct xfs_perag { * or have some other means to control concurrency. */ struct rhashtable pagi_unlinked_hash; + +#ifdef CONFIG_XFS_ONLINE_SCRUB + /* + * Counter of live intents. We track the number of log intent items + * that have been queued (but not yet processed) so that scrub can + * detect the presence of other threads that are in the middle of + * processing a chain of deferred items. + */ + atomic_t pag_intents; + wait_queue_head_t pag_intents_wq; +#endif }; int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 3045ad184972..108d950b6f41 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -363,7 +363,8 @@ xfs_defer_cancel_list( list_for_each_safe(pwi, n, &dfp->dfp_work) { list_del(pwi); dfp->dfp_count--; - ops->cancel_item(pwi); + trace_xfs_defer_cancel_item(mp, dfp, pwi); + ops->cancel_item(mp, pwi); } ASSERT(dfp->dfp_count == 0); kmem_free(dfp); @@ -442,6 +443,7 @@ xfs_defer_finish_one( list_for_each_safe(li, n, &dfp->dfp_work) { list_del(li); dfp->dfp_count--; + trace_xfs_defer_finish_item(tp->t_mountp, dfp, li); error = ops->finish_item(tp, dfp->dfp_done, li, &state); if (error == -EAGAIN) { /* @@ -585,7 +587,7 @@ xfs_defer_add( struct list_head *li) { struct xfs_defer_pending *dfp = NULL; - const struct xfs_defer_op_type *ops; + const struct xfs_defer_op_type *ops = defer_op_types[type]; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX); @@ -598,7 +600,6 @@ xfs_defer_add( if (!list_empty(&tp->t_dfops)) { dfp = list_last_entry(&tp->t_dfops, struct xfs_defer_pending, dfp_list); - ops = defer_op_types[dfp->dfp_type]; if (dfp->dfp_type != type || (ops->max_items && dfp->dfp_count >= ops->max_items)) dfp = NULL; @@ -616,6 +617,8 @@ xfs_defer_add( } list_add_tail(li, &dfp->dfp_work); + trace_xfs_defer_add_item(tp->t_mountp, dfp, li); + ops->add_item(tp->t_mountp, li); dfp->dfp_count++; } diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index b4d23235931d..51e7c992d95e 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -55,7 +55,8 @@ struct xfs_defer_op_type { struct list_head *item, struct xfs_btree_cur **state); void (*finish_cleanup)(struct xfs_trans *tp, struct xfs_btree_cur *state, int error); - void (*cancel_item)(struct list_head *item); + void (*cancel_item)(struct xfs_mount *mp, struct list_head *item); + void (*add_item)(struct xfs_mount *mp, const struct list_head *item); unsigned int max_items; }; diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c index 334d970b1314..0f52356d6ed3 100644 --- a/fs/xfs/scrub/bmap_repair.c +++ b/fs/xfs/scrub/bmap_repair.c @@ -333,7 +333,9 @@ xrep_bmap_scan_rt( if (xrep_is_rtmeta_ino(sc, sc->ip->i_ino)) return 0; - xchk_rt_lock(sc, &sc->sr); + error = xchk_rt_lock(sc, &sc->sr); + if (error) + return error; xrep_rt_btcur_init(sc, &sc->sr); error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_bmap_walk_rtrmap, rb); xchk_rt_btcur_free(&sc->sr); diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 0ea9b6b299ae..323179b3d17a 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -484,7 +484,35 @@ want_ag_read_header_failure( * * The headers should be released by xchk_ag_free, but as a fail safe we attach * all the buffers we grab to the scrub transaction so they'll all be freed - * when we cancel it. Returns ENOENT if we can't grab the perag structure. + * when we cancel it. + */ +static inline int +__xchk_ag_read_headers( + struct xfs_scrub *sc, + xfs_agnumber_t agno, + struct xchk_ag *sa) +{ + struct xfs_mount *mp = sc->mp; + int error; + + error = xfs_ialloc_read_agi(mp, sc->tp, agno, &sa->agi_bp); + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) + return error; + + error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &sa->agf_bp); + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) + return error; + + error = xfs_alloc_read_agfl(mp, sc->tp, agno, &sa->agfl_bp); + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) + return error; + + return 0; +} + +/* + * Grab all the headers for an AG, and wait until there aren't any pending + * intents. Returns -ENOENT if we can't grab the perag structure. */ int xchk_ag_read_headers( @@ -502,29 +530,83 @@ xchk_ag_read_headers( return xchk_ag_lock(sc); } -/* Lock the AG headers. */ +static inline bool +xchk_ag_intents_pending( + struct xfs_perag *pag) +{ + int intents = atomic_read(&pag->pag_intents); + + trace_xchk_ag_read_headers(pag->pag_mount, pag->pag_agno, intents, + _RET_IP_); + + return intents > 0; +} + +/* Lock the AG headers, waiting for pending intents to drain. */ int xchk_ag_lock( struct xfs_scrub *sc) { - struct xfs_mount *mp = sc->mp; struct xchk_ag *sa = &sc->sa; - xfs_agnumber_t agno = sa->pag->pag_agno; - int error; + int error = 0; - error = xfs_ialloc_read_agi(mp, sc->tp, agno, &sa->agi_bp); - if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) - return error; + ASSERT(sa->pag != NULL); + ASSERT(sa->agi_bp == NULL); + ASSERT(sa->agf_bp == NULL); + ASSERT(sa->agfl_bp == NULL); - error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &sa->agf_bp); - if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) - return error; + do { + if (xchk_should_terminate(sc, &error)) + break; - error = xfs_alloc_read_agfl(mp, sc->tp, agno, &sa->agfl_bp); - if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) - return error; + error = __xchk_ag_read_headers(sc, sa->pag->pag_agno, sa); + if (error) + break; - return 0; + /* + * Decide if this AG is quiet enough for all metadata to be + * consistent with each other. XFS allows the AG header buffer + * locks to cycle across transaction rolls while processing + * chains of deferred ops, which means that there could be + * other threads in the middle of processing a chain of + * deferred ops. For regular operations we are careful about + * ordering operations to prevent collisions between threads + * (which is why we don't need a per-AG lock), but scrub and + * repair have to serialize against chained operations. + * + * We just locked all the AG headers buffers; now take a look + * to see if there are any intents in progress. If there are, + * drop the AG headers and wait for the intents to drain. + * Since we hold all the AG header locks for the duration of + * the scrub, this is the only time we have to sample the + * intents counter; any threads increasing it after this point + * can't possibly be in the middle of a chain of AG metadata + * updates. + */ + if (!xchk_ag_intents_pending(sa->pag)) { + error = 0; + break; + } + + if (sa->agfl_bp) { + xfs_trans_brelse(sc->tp, sa->agfl_bp); + sa->agfl_bp = NULL; + } + + if (sa->agf_bp) { + xfs_trans_brelse(sc->tp, sa->agf_bp); + sa->agf_bp = NULL; + } + + if (sa->agi_bp) { + xfs_trans_brelse(sc->tp, sa->agi_bp); + sa->agi_bp = NULL; + } + + error = xfs_perag_wait_intents(sa->pag); + } while (!error); + + return error; } /* Release all the AG btree cursors. */ @@ -653,14 +735,62 @@ xchk_ag_init( return 0; } -/* Lock everything we need to work on realtime metadata. */ -void +#if IS_ENABLED(CONFIG_XFS_RT) +static inline bool +xchk_rt_intents_pending( + struct xfs_mount *mp) +{ + int intents = atomic_read(&mp->m_rt_intents); + + trace_xchk_rt_lock(mp, -1U, intents, _RET_IP_); + + return intents > 0; +} +#else +# define xchk_rt_intents_pending(mp) (false) +#endif + +/* Lock everything we need to work on realtime metadata and wait for intents. */ +int xchk_rt_lock( struct xfs_scrub *sc, struct xchk_rt *sr) { - xfs_rtlock(NULL, sc->mp, XFS_RTLOCK_ALL); - sr->locked = true; + int error = 0; + + do { + if (xchk_should_terminate(sc, &error)) + break; + + xfs_rtlock(NULL, sc->mp, XFS_RTLOCK_ALL); + + /* + * Decide if the RT volume is quiet enough for all metadata to + * be consistent with each other. Regular file IO doesn't get + * to lock all the rt inodes at the same time, which means that + * there could be other threads in the middle of processing a + * chain of deferred ops. + * + * We just locked all the rt inodes; now take a look to see if + * there are any rt intents in progress. If there are, drop + * the rt inode locks and wait for the intents to drain. Since + * we hold the rt inode locks for the duration of the scrub, + * this is the only time we have to sample the intents counter; + * any threads increasing it after this point can't possibly be + * in the middle of a chain of rt metadata updates. + */ + if (!xchk_rt_intents_pending(sc->mp)) { + sr->locked = true; + error = 0; + break; + } + + xfs_rtunlock(sc->mp, XFS_RTLOCK_ALL); + + error = xfs_rt_wait_intents(sc->mp); + } while (!error); + + return error; } /* diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 74f0606174df..819bb7e2007a 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -157,7 +157,7 @@ xchk_ag_init_existing( void xchk_rt_init(struct xfs_scrub *sc, struct xchk_rt *sr); void xchk_rt_btcur_free(struct xchk_rt *sr); -void xchk_rt_lock(struct xfs_scrub *sc, struct xchk_rt *sr); +int xchk_rt_lock(struct xfs_scrub *sc, struct xchk_rt *sr); void xchk_rt_unlock(struct xfs_scrub *sc, struct xchk_rt *sr); int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno, struct xchk_ag *sa); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 97605313f097..8eec28aa1a95 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -597,7 +597,9 @@ xrep_dinode_count_rt_rmaps( xrep_is_rtmeta_ino(sc, sc->sm->sm_ino)) return 0; - xchk_rt_lock(sc, &sc->sr); + error = xchk_rt_lock(sc, &sc->sr); + if (error) + return error; xrep_rt_btcur_init(sc, &sc->sr); error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap, dis); diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 47592ca215c6..e35c2c8b0513 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -477,6 +477,7 @@ xrep_newbt_schedule_reap( INIT_LIST_HEAD(&efi_item.xefi_list); list_add(&efi_item.xefi_list, &items); + xfs_fs_bump_intents(xnr->sc->mp, false, resv->fsbno); resv->efi = xfs_extent_free_defer_type.create_intent(xnr->sc->tp, &items, 1, false); } @@ -698,6 +699,7 @@ xrep_newbt_destroy( goto junkit; list_del(&resv->list); + xfs_fs_drop_intents(sc->mp, false, resv->fsbno); kmem_free(resv); } @@ -710,6 +712,7 @@ junkit: list_for_each_entry_safe(resv, n, &xnr->resv_list, list) { xfs_extent_free_defer_type.abort_intent(resv->efi); list_del(&resv->list); + xfs_fs_drop_intents(sc->mp, false, resv->fsbno); kmem_free(resv); } diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c index f669f51f6f70..d68ef4d1ec16 100644 --- a/fs/xfs/scrub/rtrmap_repair.c +++ b/fs/xfs/scrub/rtrmap_repair.c @@ -569,7 +569,9 @@ xrep_rtrmap_find_rmaps( error = xchk_setup_fs(sc); if (error) return error; - xchk_rt_lock(sc, &sc->sr); + error = xchk_rt_lock(sc, &sc->sr); + if (error) + return error; /* Scan for old rtrmap blocks. */ for_each_perag(sc->mp, agno, pag) { diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 9878ee415e8b..5c43578a174e 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -717,6 +717,37 @@ TRACE_EVENT(xchk_iallocbt_check_cluster, __entry->cluster_ino) ) +DECLARE_EVENT_CLASS(xchk_ag_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, + unsigned long caller_ip), + TP_ARGS(mp, agno, refcount, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, refcount) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->refcount = refcount; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno %u refcount %d caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->refcount, + (char *)__entry->caller_ip) +); + +#define DEFINE_XCHK_AG_EVENT(name) \ +DEFINE_EVENT(xchk_ag_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agno, refcount, caller_ip)) +DEFINE_XCHK_AG_EVENT(xchk_ag_read_headers); +DEFINE_XCHK_AG_EVENT(xchk_rt_lock); + TRACE_EVENT(xchk_fscounters_calc, TP_PROTO(struct xfs_mount *mp, uint64_t icount, uint64_t ifree, uint64_t fdblocks, uint64_t delalloc), diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 460876300451..d7bf99f10e4c 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -351,14 +351,30 @@ xfs_bmap_update_finish_item( struct xfs_btree_cur **state) { struct xfs_bmap_intent *bi; + struct xfs_mount *mp = tp->t_mountp; + xfs_fsblock_t orig_startblock; int error; bi = container_of(item, struct xfs_bmap_intent, bi_list); + orig_startblock = bi->bi_bmap.br_startblock; error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), bi); if (!error && bi->bi_bmap.br_blockcount > 0) { ASSERT(bi->bi_type == XFS_BMAP_UNMAP); return -EAGAIN; } + + /* + * Drop our intent counter reference now that we've either queued a + * deferred rmap intent or failed. Be careful to use the original + * startblock since the finishing functions can update the intent + * state. + */ + if (xfs_has_rmapbt(mp)) { + bool rt = xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork); + + xfs_fs_drop_intents(mp, rt, orig_startblock); + } + kmem_free(bi); return error; } @@ -371,17 +387,47 @@ xfs_bmap_update_abort_intent( xfs_bui_release(BUI_ITEM(intent)); } -/* Cancel a deferred rmap update. */ +/* Cancel a deferred bmap update. */ STATIC void xfs_bmap_update_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_bmap_intent *bi; bi = container_of(item, struct xfs_bmap_intent, bi_list); + + /* Drop our intent counter reference since we're going away. */ + if (xfs_has_rmapbt(mp)) { + bool rt = xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork); + + xfs_fs_drop_intents(mp, rt, bi->bi_bmap.br_startblock); + } + kmem_free(bi); } +/* Add a deferred bmap update. */ +STATIC void +xfs_bmap_update_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ + const struct xfs_bmap_intent *bi; + + bi = container_of(item, struct xfs_bmap_intent, bi_list); + + /* + * Grab an intent counter reference on behalf of the deferred rmap + * intent item that we will queue when we finish this bmap work. + */ + if (xfs_has_rmapbt(mp)) { + bool rt = xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork); + + xfs_fs_bump_intents(mp, rt, bi->bi_bmap.br_startblock); + } +} + const struct xfs_defer_op_type xfs_bmap_update_defer_type = { .max_items = XFS_BUI_MAX_FAST_EXTENTS, .create_intent = xfs_bmap_update_create_intent, @@ -389,6 +435,7 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = { .create_done = xfs_bmap_update_create_done, .finish_item = xfs_bmap_update_finish_item, .cancel_item = xfs_bmap_update_cancel_item, + .add_item = xfs_bmap_update_add_item, }; /* Is this recovered BUI ok? */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f93b033b447e..e38e428eeb59 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -487,6 +487,7 @@ xfs_extent_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { + struct xfs_mount *mp = tp->t_mountp; struct xfs_extent_free_item *xefi; int error; @@ -502,6 +503,14 @@ xfs_extent_free_finish_item( } error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi); + + /* + * Drop our intent counter reference now that we've finished all the + * work or failed. The finishing function doesn't update the intent + * state, so we need not preserve the original startblock. + */ + xfs_fs_drop_intents(mp, xefi->xefi_realtime, xefi->xefi_startblock); + kmem_free(xefi); return error; } @@ -517,14 +526,30 @@ xfs_extent_free_abort_intent( /* Cancel a free extent. */ STATIC void xfs_extent_free_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_extent_free_item *xefi; xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + xfs_fs_drop_intents(mp, xefi->xefi_realtime, xefi->xefi_startblock); kmem_free(xefi); } +/* Add a deferred free extent. */ +STATIC void +xfs_extent_free_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ + const struct xfs_extent_free_item *xefi; + + xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + + /* Grab an intent counter reference for this intent item. */ + xfs_fs_bump_intents(mp, xefi->xefi_realtime, xefi->xefi_startblock); +} + const struct xfs_defer_op_type xfs_extent_free_defer_type = { .max_items = XFS_EFI_MAX_FAST_EXTENTS, .create_intent = xfs_extent_free_create_intent, @@ -532,6 +557,7 @@ const struct xfs_defer_op_type xfs_extent_free_defer_type = { .create_done = xfs_extent_free_create_done, .finish_item = xfs_extent_free_finish_item, .cancel_item = xfs_extent_free_cancel_item, + .add_item = xfs_extent_free_add_item, }; /* @@ -585,6 +611,8 @@ xfs_agfl_free_finish_item( extp->ext_len = xefi->xefi_blockcount; efdp->efd_next_extent++; + xfs_fs_drop_intents(mp, xefi->xefi_realtime, xefi->xefi_startblock); + kmem_free(xefi); return error; } @@ -597,6 +625,7 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = { .create_done = xfs_extent_free_create_done, .finish_item = xfs_agfl_free_finish_item, .cancel_item = xfs_extent_free_cancel_item, + .add_item = xfs_extent_free_add_item, }; /* Is this recovered EFI ok? */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f0643442aecc..b02be0019ac8 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1467,3 +1467,98 @@ xfs_hook_call( return srcu_notifier_call_chain(&chain->head, val, priv); } #endif /* CONFIG_XFS_LIVE_HOOKS */ + +#ifdef CONFIG_XFS_ONLINE_SCRUB +# ifdef CONFIG_XFS_RT +static inline void +xfs_rt_bump_intents( + struct xfs_mount *mp) +{ + trace_xfs_rt_bump_intents(mp, __return_address); + + atomic_inc(&mp->m_rt_intents); +} + +static inline void +xfs_rt_drop_intents( + struct xfs_mount *mp) +{ + trace_xfs_rt_drop_intents(mp, __return_address); + + ASSERT(atomic_read(&mp->m_rt_intents) > 0); + + if (atomic_dec_and_test(&mp->m_rt_intents)) + wake_up(&mp->m_rt_intents_wq); +} + +int +xfs_rt_wait_intents( + struct xfs_mount *mp) +{ + trace_xfs_rt_wait_intents(mp, __return_address); + + return wait_event_killable(mp->m_rt_intents_wq, + atomic_read(&mp->m_rt_intents) == 0); +} +# else +static inline void xfs_rt_bump_intents(struct xfs_mount *mp) { } +static inline void xfs_rt_drop_intents(struct xfs_mount *mp) { } +# endif /* CONFIG_XFS_RT */ + +static inline void +xfs_ag_bump_intents( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag = xfs_perag_get(mp, agno); + + trace_xfs_perag_bump_intents(pag, __return_address); + + atomic_inc(&pag->pag_intents); + xfs_perag_put(pag); +} + +static inline void +xfs_ag_drop_intents( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag = xfs_perag_get(mp, agno); + + trace_xfs_perag_drop_intents(pag, __return_address); + + ASSERT(atomic_read(&pag->pag_intents) > 0); + + if (atomic_dec_and_test(&pag->pag_intents)) + wake_up(&pag->pag_intents_wq); + xfs_perag_put(pag); +} + +void +xfs_fs_bump_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb) +{ + if (isrt) + xfs_rt_bump_intents(mp); + else + xfs_ag_bump_intents(mp, XFS_FSB_TO_AGNO(mp, fsb)); +} + +void +xfs_fs_drop_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb) +{ + if (isrt) + xfs_rt_drop_intents(mp); + else + xfs_ag_drop_intents(mp, XFS_FSB_TO_AGNO(mp, fsb)); +} + +int +xfs_perag_wait_intents( + struct xfs_perag *pag) +{ + trace_xfs_perag_wait_intents(pag, __return_address); + + return wait_event_killable(pag->pag_intents_wq, + atomic_read(&pag->pag_intents) == 0); +} +#endif /* CONFIG_XFS_ONLINE_SCRUB */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 8ff3fc3e83de..05b791aa24d1 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -274,6 +274,17 @@ typedef struct xfs_mount { /* online file link count check stuff */ struct xfs_hook_chain m_nlink_delta_hooks; + +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) && IS_ENABLED(CONFIG_XFS_RT) + /* + * Counter of live intents. We track the number of log intent items + * that have been queued (but not yet processed) so that scrub can + * detect the presence of other threads that are in the middle of + * processing a chain of deferred items. + */ + atomic_t m_rt_intents; + wait_queue_head_t m_rt_intents_wq; +#endif } xfs_mount_t; /* @@ -593,4 +604,26 @@ struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp, void xfs_force_summary_recalc(struct xfs_mount *mp); void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta); +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) +# if IS_ENABLED(CONFIG_XFS_RT) +int xfs_rt_wait_intents(struct xfs_mount *mp); +# else +# define xfs_rt_wait_intents(mp) (-ENOSYS) +# endif /* CONFIG_XFS_RT */ + +void xfs_fs_bump_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb); +void xfs_fs_drop_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb); +int xfs_perag_wait_intents(struct xfs_perag *pag); + +#else +static inline void +xfs_fs_bump_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb) { } +static inline void +xfs_fs_drop_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb) { } + +int xfs_perag_wait_intents(struct xfs_perag *pag); +# define xfs_perag_wait_intents(pag) (-ENOSYS) +# define xfs_rt_wait_intents(mp) (-ENOSYS) +#endif /* CONFIG_XFS_ONLINE_SCRUB */ + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 5812e6e1fc06..b50248f1df8a 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -364,9 +364,12 @@ xfs_refcount_update_finish_item( struct xfs_btree_cur **state) { struct xfs_refcount_intent *ri; + struct xfs_mount *mp = tp->t_mountp; + xfs_fsblock_t orig_startblock; int error; ri = container_of(item, struct xfs_refcount_intent, ri_list); + orig_startblock = ri->ri_startblock; error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), ri, state); @@ -376,6 +379,13 @@ xfs_refcount_update_finish_item( ri->ri_type == XFS_REFCOUNT_DECREASE); return -EAGAIN; } + + /* + * Drop our intent counter reference now that we've finished all the + * work or failed. Be careful to use the original startblock because + * the finishing functions can update the intent state. + */ + xfs_fs_drop_intents(mp, ri->ri_realtime, orig_startblock); kmem_free(ri); return error; } @@ -391,14 +401,29 @@ xfs_refcount_update_abort_intent( /* Cancel a deferred refcount update. */ STATIC void xfs_refcount_update_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_refcount_intent *ri; ri = container_of(item, struct xfs_refcount_intent, ri_list); + xfs_fs_drop_intents(mp, ri->ri_realtime, ri->ri_startblock); kmem_free(ri); } +/* Add a deferred refcount update. */ +STATIC void +xfs_refcount_update_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ + const struct xfs_refcount_intent *ri; + + /* Grab an intent counter reference for this intent item. */ + ri = container_of(item, struct xfs_refcount_intent, ri_list); + xfs_fs_bump_intents(mp, ri->ri_realtime, ri->ri_startblock); +} + const struct xfs_defer_op_type xfs_refcount_update_defer_type = { .max_items = XFS_CUI_MAX_FAST_EXTENTS, .create_intent = xfs_refcount_update_create_intent, @@ -407,6 +432,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = { .finish_item = xfs_refcount_update_finish_item, .finish_cleanup = xfs_refcount_finish_one_cleanup, .cancel_item = xfs_refcount_update_cancel_item, + .add_item = xfs_refcount_update_add_item, }; /* Is this recovered CUI ok? */ diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index f08ed05c98be..de6b122ac126 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -411,11 +411,19 @@ xfs_rmap_update_finish_item( struct xfs_btree_cur **state) { struct xfs_rmap_intent *ri; + struct xfs_mount *mp = tp->t_mountp; int error; ri = container_of(item, struct xfs_rmap_intent, ri_list); error = xfs_trans_log_finish_rmap_update(tp, RUD_ITEM(done), ri, state); + + /* + * Drop our intent counter reference now that we've finished all the + * work or failed. The finishing function doesn't update the intent + * state, so we need not preserve the original startblock. + */ + xfs_fs_drop_intents(mp, ri->ri_realtime, ri->ri_bmap.br_startblock); kmem_free(ri); return error; } @@ -431,14 +439,30 @@ xfs_rmap_update_abort_intent( /* Cancel a deferred rmap update. */ STATIC void xfs_rmap_update_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_rmap_intent *ri; ri = container_of(item, struct xfs_rmap_intent, ri_list); + xfs_fs_drop_intents(mp, ri->ri_realtime, ri->ri_bmap.br_startblock); kmem_free(ri); } +/* Add a deferred rmap update. */ +STATIC void +xfs_rmap_update_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ + const struct xfs_rmap_intent *ri; + + ri = container_of(item, struct xfs_rmap_intent, ri_list); + + /* Grab an intent counter reference for this intent item. */ + xfs_fs_bump_intents(mp, ri->ri_realtime, ri->ri_bmap.br_startblock); +} + const struct xfs_defer_op_type xfs_rmap_update_defer_type = { .max_items = XFS_RUI_MAX_FAST_EXTENTS, .create_intent = xfs_rmap_update_create_intent, @@ -447,6 +471,7 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = { .finish_item = xfs_rmap_update_finish_item, .finish_cleanup = xfs_rmap_finish_one_cleanup, .cancel_item = xfs_rmap_update_cancel_item, + .add_item = xfs_rmap_update_add_item, }; /* Is this recovered RUI ok? */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f0f2e478a794..c3f3c669a1dc 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -733,6 +733,9 @@ xfs_mount_free( ASSERT(!mutex_is_locked(&mp->m_scrub_freeze)); mutex_destroy(&mp->m_scrub_freeze); +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) && IS_ENABLED(CONFIG_XFS_RT) + ASSERT(atomic_read(&mp->m_rt_intents) == 0); +#endif kmem_free(mp); } @@ -1985,6 +1988,10 @@ static int xfs_init_fs_context( INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); mp->m_kobj.kobject.kset = xfs_kset; +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) && IS_ENABLED(CONFIG_XFS_RT) + init_waitqueue_head(&mp->m_rt_intents_wq); + atomic_set(&mp->m_rt_intents, 0); +#endif /* * We don't create the finobt per-ag space reservation until after log * recovery, so we must set this to true so that an ifree transaction diff --git a/fs/xfs/xfs_swapext_item.c b/fs/xfs/xfs_swapext_item.c index 95041fe69ba7..93d1f27cf3b9 100644 --- a/fs/xfs/xfs_swapext_item.c +++ b/fs/xfs/xfs_swapext_item.c @@ -346,6 +346,7 @@ xfs_swapext_abort_intent( /* Cancel a deferred swapext update. */ STATIC void xfs_swapext_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_swapext_intent *sxi; @@ -354,6 +355,14 @@ xfs_swapext_cancel_item( kmem_free(sxi); } +/* Add a deferred swapext update. */ +STATIC void +xfs_swapext_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ +} + const struct xfs_defer_op_type xfs_swapext_defer_type = { .max_items = XFS_SXI_MAX_FAST_EXTENTS, .create_intent = xfs_swapext_create_intent, @@ -361,6 +370,7 @@ const struct xfs_defer_op_type xfs_swapext_defer_type = { .create_done = xfs_swapext_create_done, .finish_item = xfs_swapext_finish_item, .cancel_item = xfs_swapext_cancel_item, + .add_item = xfs_swapext_add_item, }; /* Is this recovered SXI ok? */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7879e11115b8..3e145cc5762d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2748,6 +2748,44 @@ DEFINE_EVENT(xfs_free_extent_deferred_class, name, \ DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer); DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred); +DECLARE_EVENT_CLASS(xfs_defer_pending_item_class, + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, + void *item), + TP_ARGS(mp, dfp, item), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(void *, intent) + __field(void *, item) + __field(char, committed) + __field(int, nr) + ), + TP_fast_assign( + __entry->dev = mp ? mp->m_super->s_dev : 0; + __entry->type = dfp->dfp_type; + __entry->intent = dfp->dfp_intent; + __entry->item = item; + __entry->committed = dfp->dfp_done != NULL; + __entry->nr = dfp->dfp_count; + ), + TP_printk("dev %d:%d optype %d intent %p item %p committed %d nr %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->intent, + __entry->item, + __entry->committed, + __entry->nr) +) +#define DEFINE_DEFER_PENDING_ITEM_EVENT(name) \ +DEFINE_EVENT(xfs_defer_pending_item_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, \ + void *item), \ + TP_ARGS(mp, dfp, item)) + +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_add_item); +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_cancel_item); +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_finish_item); + /* rmap tracepoints */ DECLARE_EVENT_CLASS(xfs_rmap_class, TP_PROTO(struct xfs_btree_cur *cur, @@ -4893,6 +4931,74 @@ DEFINE_IMETA_RESV_EVENT(xfs_imeta_resv_free_extent); DEFINE_IMETA_RESV_EVENT(xfs_imeta_resv_critical); DEFINE_INODE_ERROR_EVENT(xfs_imeta_resv_init_error); +DECLARE_EVENT_CLASS(xfs_perag_intents_class, + TP_PROTO(struct xfs_perag *pag, void *caller_ip), + TP_ARGS(pag, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(long, nr_intents) + __field(void *, caller_ip) + ), + TP_fast_assign( + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->agno = pag->pag_agno; +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) + __entry->nr_intents = atomic_read(&pag->pag_intents); +#else + __entry->nr_intents = -1; +#endif + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno 0x%x intents %ld caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->nr_intents, + __entry->caller_ip) +); + +#define DEFINE_PERAG_INTENTS_EVENT(name) \ +DEFINE_EVENT(xfs_perag_intents_class, name, \ + TP_PROTO(struct xfs_perag *pag, void *caller_ip), \ + TP_ARGS(pag, caller_ip)) +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_bump_intents); +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_drop_intents); +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents); + +DECLARE_EVENT_CLASS(xfs_rt_intents_class, + TP_PROTO(struct xfs_mount *mp, void *caller_ip), + TP_ARGS(mp, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, rtdev) + __field(long, nr_intents) + __field(void *, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->rtdev = mp->m_rtdev_targp->bt_dev; +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) && IS_ENABLED(CONFIG_XFS_RT) + __entry->nr_intents = atomic_read(&mp->m_rt_intents); +#else + __entry->nr_intents = -1; +#endif + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d rtdev %d:%d intents %ld caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->rtdev), MINOR(__entry->rtdev), + __entry->nr_intents, + __entry->caller_ip) +); + +#define DEFINE_RT_INTENTS_EVENT(name) \ +DEFINE_EVENT(xfs_rt_intents_class, name, \ + TP_PROTO(struct xfs_mount *mp, void *caller_ip), \ + TP_ARGS(mp, caller_ip)) +DEFINE_RT_INTENTS_EVENT(xfs_rt_bump_intents); +DEFINE_RT_INTENTS_EVENT(xfs_rt_drop_intents); +DEFINE_RT_INTENTS_EVENT(xfs_rt_wait_intents); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH |