From b9872a3e43dcb62d05ee10f93ce45940e0674487 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 22 Oct 2021 15:31:05 -0700 Subject: xfs: allow queued AG intents to drain before scrubbing Currently, online scrub isn't sufficiently careful about quiescing allocation groups before checking them. While scrub does take the AG header locks, it doesn't serialize against chains of AG update intents that are being processed concurrently. If there's a collision, cross-referencing between data structures (e.g. rmapbt and refcountbt) can yield false corruption events; if repair is running, this results in incorrect repairs. Fix this by adding to the perag structure the count of active intents and make scrub wait until there aren't any to continue. This is a little stupid since transactions can queue intents without taking buffer locks, but we'll also wait for those transactions. XXX: should have instead a per-ag rwsem that gets taken as soon as the AG[IF] are locked and stays held until the transaction commits or moves on to the next AG? would we rather have a six lock so that intents can take an ix lock, and not have to upgrade to x until we actually want to make changes to that ag? is that how those even work?? Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_trace.h | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'fs/xfs/xfs_trace.h') diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7879e11115b8..3e145cc5762d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2748,6 +2748,44 @@ DEFINE_EVENT(xfs_free_extent_deferred_class, name, \ DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer); DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred); +DECLARE_EVENT_CLASS(xfs_defer_pending_item_class, + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, + void *item), + TP_ARGS(mp, dfp, item), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(void *, intent) + __field(void *, item) + __field(char, committed) + __field(int, nr) + ), + TP_fast_assign( + __entry->dev = mp ? mp->m_super->s_dev : 0; + __entry->type = dfp->dfp_type; + __entry->intent = dfp->dfp_intent; + __entry->item = item; + __entry->committed = dfp->dfp_done != NULL; + __entry->nr = dfp->dfp_count; + ), + TP_printk("dev %d:%d optype %d intent %p item %p committed %d nr %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->intent, + __entry->item, + __entry->committed, + __entry->nr) +) +#define DEFINE_DEFER_PENDING_ITEM_EVENT(name) \ +DEFINE_EVENT(xfs_defer_pending_item_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, \ + void *item), \ + TP_ARGS(mp, dfp, item)) + +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_add_item); +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_cancel_item); +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_finish_item); + /* rmap tracepoints */ DECLARE_EVENT_CLASS(xfs_rmap_class, TP_PROTO(struct xfs_btree_cur *cur, @@ -4893,6 +4931,74 @@ DEFINE_IMETA_RESV_EVENT(xfs_imeta_resv_free_extent); DEFINE_IMETA_RESV_EVENT(xfs_imeta_resv_critical); DEFINE_INODE_ERROR_EVENT(xfs_imeta_resv_init_error); +DECLARE_EVENT_CLASS(xfs_perag_intents_class, + TP_PROTO(struct xfs_perag *pag, void *caller_ip), + TP_ARGS(pag, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(long, nr_intents) + __field(void *, caller_ip) + ), + TP_fast_assign( + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->agno = pag->pag_agno; +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) + __entry->nr_intents = atomic_read(&pag->pag_intents); +#else + __entry->nr_intents = -1; +#endif + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno 0x%x intents %ld caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->nr_intents, + __entry->caller_ip) +); + +#define DEFINE_PERAG_INTENTS_EVENT(name) \ +DEFINE_EVENT(xfs_perag_intents_class, name, \ + TP_PROTO(struct xfs_perag *pag, void *caller_ip), \ + TP_ARGS(pag, caller_ip)) +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_bump_intents); +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_drop_intents); +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents); + +DECLARE_EVENT_CLASS(xfs_rt_intents_class, + TP_PROTO(struct xfs_mount *mp, void *caller_ip), + TP_ARGS(mp, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, rtdev) + __field(long, nr_intents) + __field(void *, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->rtdev = mp->m_rtdev_targp->bt_dev; +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) && IS_ENABLED(CONFIG_XFS_RT) + __entry->nr_intents = atomic_read(&mp->m_rt_intents); +#else + __entry->nr_intents = -1; +#endif + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d rtdev %d:%d intents %ld caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->rtdev), MINOR(__entry->rtdev), + __entry->nr_intents, + __entry->caller_ip) +); + +#define DEFINE_RT_INTENTS_EVENT(name) \ +DEFINE_EVENT(xfs_rt_intents_class, name, \ + TP_PROTO(struct xfs_mount *mp, void *caller_ip), \ + TP_ARGS(mp, caller_ip)) +DEFINE_RT_INTENTS_EVENT(xfs_rt_bump_intents); +DEFINE_RT_INTENTS_EVENT(xfs_rt_drop_intents); +DEFINE_RT_INTENTS_EVENT(xfs_rt_wait_intents); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3