diff options
-rw-r--r-- | fs/xfs/Kconfig | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.c | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.h | 9 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_defer.c | 9 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_defer.h | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/bmap.c | 8 | ||||
-rw-r--r-- | fs/xfs/scrub/bmap_repair.c | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/common.c | 162 | ||||
-rw-r--r-- | fs/xfs/scrub/common.h | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/inode_repair.c | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.c | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/rtbitmap.c | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/rtrefcount.c | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/rtrmap.c | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/rtrmap_repair.c | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/rtsummary.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_item.c | 53 | ||||
-rw-r--r-- | fs/xfs/xfs_extfree_item.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 94 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 68 | ||||
-rw-r--r-- | fs/xfs/xfs_refcount_item.c | 25 | ||||
-rw-r--r-- | fs/xfs/xfs_rmap_item.c | 17 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_swapext_item.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 102 |
25 files changed, 599 insertions, 38 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 7d08a8e92c92..5ad64e173672 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -96,6 +96,9 @@ config XFS_RT config XFS_LIVE_HOOKS bool +config XFS_DRAIN_INTENTS + bool + config XFS_ONLINE_SCRUB bool "XFS online metadata check support" default n @@ -103,6 +106,7 @@ config XFS_ONLINE_SCRUB depends on TMPFS && SHMEM depends on SRCU select XFS_LIVE_HOOKS + select XFS_DRAIN_INTENTS help If you say Y here you will be able to check metadata on a mounted XFS filesystem. This feature is intended to reduce diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 30ded0376db2..2da6af33c826 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -193,6 +193,7 @@ xfs_free_perag( spin_unlock(&mp->m_perag_lock); ASSERT(pag); ASSERT(atomic_read(&pag->pag_ref) == 0); + xfs_drain_free(&pag->pag_intents); cancel_delayed_work_sync(&pag->pag_blockgc_work); xfs_iunlink_destroy(pag); @@ -255,6 +256,7 @@ xfs_initialize_perag( spin_lock_init(&pag->pag_state_lock); INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + xfs_drain_init(&pag->pag_intents); init_waitqueue_head(&pag->pagb_wait); pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; @@ -283,6 +285,7 @@ xfs_initialize_perag( out_hash_destroy: xfs_buf_hash_destroy(pag); + xfs_drain_free(&pag->pag_intents); out_remove_pag: radix_tree_delete(&mp->m_perag_tree, index); out_free_pag: @@ -294,6 +297,7 @@ out_unwind_new_pags: if (!pag) break; xfs_buf_hash_destroy(pag); + xfs_drain_free(&pag->pag_intents); xfs_iunlink_destroy(pag); kmem_free(pag); } diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index aae98fda12f6..2edd4c4ff07c 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -104,6 +104,15 @@ struct xfs_perag { * or have some other means to control concurrency. */ struct rhashtable pagi_unlinked_hash; + + /* + * We use xfs_drain to track the number of deferred log intent items + * that have been queued (but not yet processed) so that waiters (e.g. + * scrub) will not lock resources when other threads are in the middle + * of processing a chain of intent items only to find momentary + * inconsistencies. + */ + struct xfs_drain pag_intents; #endif /* __KERNEL__ */ }; diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index ccb834a82e2e..59f8a2af39d5 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -370,7 +370,8 @@ xfs_defer_cancel_list( list_for_each_safe(pwi, n, &dfp->dfp_work) { list_del(pwi); dfp->dfp_count--; - ops->cancel_item(pwi); + trace_xfs_defer_cancel_item(mp, dfp, pwi); + ops->cancel_item(mp, pwi); } ASSERT(dfp->dfp_count == 0); kmem_cache_free(xfs_defer_pending_cache, dfp); @@ -449,6 +450,7 @@ xfs_defer_finish_one( list_for_each_safe(li, n, &dfp->dfp_work) { list_del(li); dfp->dfp_count--; + trace_xfs_defer_finish_item(tp->t_mountp, dfp, li); error = ops->finish_item(tp, dfp->dfp_done, li, &state); if (error == -EAGAIN) { /* @@ -592,7 +594,7 @@ xfs_defer_add( struct list_head *li) { struct xfs_defer_pending *dfp = NULL; - const struct xfs_defer_op_type *ops; + const struct xfs_defer_op_type *ops = defer_op_types[type]; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX); @@ -605,7 +607,6 @@ xfs_defer_add( if (!list_empty(&tp->t_dfops)) { dfp = list_last_entry(&tp->t_dfops, struct xfs_defer_pending, dfp_list); - ops = defer_op_types[dfp->dfp_type]; if (dfp->dfp_type != type || (ops->max_items && dfp->dfp_count >= ops->max_items)) dfp = NULL; @@ -623,6 +624,8 @@ xfs_defer_add( } list_add_tail(li, &dfp->dfp_work); + trace_xfs_defer_add_item(tp->t_mountp, dfp, li); + ops->add_item(tp->t_mountp, li); dfp->dfp_count++; } diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 89237d8fdedd..6c575b7aacfb 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -55,7 +55,8 @@ struct xfs_defer_op_type { struct list_head *item, struct xfs_btree_cur **state); void (*finish_cleanup)(struct xfs_trans *tp, struct xfs_btree_cur *state, int error); - void (*cancel_item)(struct list_head *item); + void (*cancel_item)(struct xfs_mount *mp, struct list_head *item); + void (*add_item)(struct xfs_mount *mp, const struct list_head *item); unsigned int max_items; }; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 9ac01b24c9b0..dc472aa6f548 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -318,7 +318,12 @@ xchk_bmap_rt_iextent_xref( struct xchk_bmap_info *info, struct xfs_bmbt_irec *irec) { - xchk_rt_init(info->sc, &info->sc->sr); + int error; + + error = xchk_rt_init(info->sc, &info->sc->sr); + if (!xchk_fblock_process_error(info->sc, info->whichfork, + irec->br_startoff, &error)) + goto out_free; xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, irec->br_blockcount); @@ -336,6 +341,7 @@ xchk_bmap_rt_iextent_xref( break; } +out_free: xchk_rt_btcur_free(&info->sc->sr); xchk_rt_unlock(info->sc, &info->sc->sr); } diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c index 2256b77e20fc..ad6f00c4767f 100644 --- a/fs/xfs/scrub/bmap_repair.c +++ b/fs/xfs/scrub/bmap_repair.c @@ -339,7 +339,9 @@ xrep_bmap_scan_rt( if (xrep_is_rtmeta_ino(sc, sc->ip->i_ino)) return 0; - xchk_rt_lock(sc, &sc->sr); + error = xchk_rt_lock(sc, &sc->sr); + if (error) + return error; xrep_rt_btcur_init(sc, &sc->sr); error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_bmap_walk_rtrmap, rb); xchk_rt_btcur_free(&sc->sr); diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 246d33ac46b9..4987aebca74e 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -484,7 +484,35 @@ want_ag_read_header_failure( * * The headers should be released by xchk_ag_free, but as a fail safe we attach * all the buffers we grab to the scrub transaction so they'll all be freed - * when we cancel it. Returns ENOENT if we can't grab the perag structure. + * when we cancel it. + */ +static inline int +__xchk_ag_read_headers( + struct xfs_scrub *sc, + xfs_agnumber_t agno, + struct xchk_ag *sa) +{ + struct xfs_mount *mp = sc->mp; + int error; + + error = xfs_ialloc_read_agi(mp, sc->tp, agno, &sa->agi_bp); + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) + return error; + + error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &sa->agf_bp); + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) + return error; + + error = xfs_alloc_read_agfl(mp, sc->tp, agno, &sa->agfl_bp); + if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) + return error; + + return 0; +} + +/* + * Grab all the headers for an AG, and wait until there aren't any pending + * intents. Returns -ENOENT if we can't grab the perag structure. */ int xchk_ag_read_headers( @@ -502,29 +530,72 @@ xchk_ag_read_headers( return xchk_ag_lock(sc); } -/* Lock the AG headers. */ +/* Lock the AG headers, waiting for pending intents to drain. */ int xchk_ag_lock( struct xfs_scrub *sc) { - struct xfs_mount *mp = sc->mp; struct xchk_ag *sa = &sc->sa; - xfs_agnumber_t agno = sa->pag->pag_agno; - int error; + int error = 0; - error = xfs_ialloc_read_agi(mp, sc->tp, agno, &sa->agi_bp); - if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) - return error; + ASSERT(sa->pag != NULL); + ASSERT(sa->agi_bp == NULL); + ASSERT(sa->agf_bp == NULL); + ASSERT(sa->agfl_bp == NULL); - error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &sa->agf_bp); - if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) - return error; + do { + if (xchk_should_terminate(sc, &error)) + return error; - error = xfs_alloc_read_agfl(mp, sc->tp, agno, &sa->agfl_bp); - if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) - return error; + error = __xchk_ag_read_headers(sc, sa->pag->pag_agno, sa); + if (error) + return error; - return 0; + /* + * Decide if this AG is quiet enough for all metadata to be + * consistent with each other. XFS allows the AG header buffer + * locks to cycle across transaction rolls while processing + * chains of deferred ops, which means that there could be + * other threads in the middle of processing a chain of + * deferred ops. For regular operations we are careful about + * ordering operations to prevent collisions between threads + * (which is why we don't need a per-AG lock), but scrub and + * repair have to serialize against chained operations. + * + * We just locked all the AG headers buffers; now take a look + * to see if there are any intents in progress. If there are, + * drop the AG headers and wait for the intents to drain. + * Since we hold all the AG header locks for the duration of + * the scrub, this is the only time we have to sample the + * intents counter; any threads increasing it after this point + * can't possibly be in the middle of a chain of AG metadata + * updates. + * + * Obviously, this should be slanted against scrub and in favor + * of runtime threads. + */ + if (!xfs_drain_busy(&sa->pag->pag_intents)) + return 0; + + if (sa->agfl_bp) { + xfs_trans_brelse(sc->tp, sa->agfl_bp); + sa->agfl_bp = NULL; + } + + if (sa->agf_bp) { + xfs_trans_brelse(sc->tp, sa->agf_bp); + sa->agf_bp = NULL; + } + + if (sa->agi_bp) { + xfs_trans_brelse(sc->tp, sa->agi_bp); + sa->agi_bp = NULL; + } + + error = xfs_perag_drain_intents(sa->pag); + } while (!error); + + return error; } /* Release all the AG btree cursors. */ @@ -653,15 +724,63 @@ xchk_ag_init( return 0; } -/* Lock everything we need to work on realtime metadata. */ -void +#ifdef CONFIG_XFS_RT +/* Lock everything we need to work on realtime metadata and wait for intents. */ +int +xchk_rt_lock( + struct xfs_scrub *sc, + struct xchk_rt *sr) +{ + int error = 0; + + do { + if (xchk_should_terminate(sc, &error)) + return error; + + xfs_rtlock(NULL, sc->mp, XFS_RTLOCK_ALL); + + /* + * Decide if the RT volume is quiet enough for all metadata to + * be consistent with each other. Regular file IO doesn't get + * to lock all the rt inodes at the same time, which means that + * there could be other threads in the middle of processing a + * chain of deferred ops. + * + * We just locked all the rt inodes; now take a look to see if + * there are any rt intents in progress. If there are, drop + * the rt inode locks and wait for the intents to drain. Since + * we hold the rt inode locks for the duration of the scrub, + * this is the only time we have to sample the intents counter; + * any threads increasing it after this point can't possibly be + * in the middle of a chain of rt metadata updates. + * + * Obviously, this should be slanted against scrub and in favor + * of runtime threads. + */ + if (!xfs_drain_busy(&sc->mp->m_rt_intents)) { + sr->locked = true; + return 0; + } + + xfs_rtunlock(sc->mp, XFS_RTLOCK_ALL); + + error = xfs_rt_drain_intents(sc->mp); + } while (!error); + + return error; +} +#else +/* Lock everything we need to work on realtime metadata and wait for intents. */ +int xchk_rt_lock( struct xfs_scrub *sc, struct xchk_rt *sr) { xfs_rtlock(NULL, sc->mp, XFS_RTLOCK_ALL); sr->locked = true; + return 0; } +#endif /* CONFIG_XFS_RT */ /* * For scrubbing a realtime file, grab all the in-core resources we'll need to @@ -669,14 +788,17 @@ xchk_rt_lock( * metadata inodes. Callers must not join these inodes to the transaction * with non-zero lockflags or concurrency problems will result. */ -void +int xchk_rt_init( struct xfs_scrub *sc, struct xchk_rt *sr) { struct xfs_mount *mp = sc->mp; + int error; - xchk_rt_lock(sc, sr); + error = xchk_rt_lock(sc, sr); + if (error) + return error; if (xfs_has_rtrmapbt(mp)) sr->rmap_cur = xfs_rtrmapbt_init_cursor(mp, sc->tp, @@ -685,6 +807,8 @@ xchk_rt_init( if (xfs_has_reflink(mp)) sr->refc_cur = xfs_rtrefcountbt_init_cursor(mp, sc->tp, mp->m_rrefcountip); + + return 0; } /* diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index f2e1cf719c06..36a0be71cfbf 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -155,9 +155,9 @@ xchk_ag_init_existing( return error == -ENOENT ? -EFSCORRUPTED : error; } -void xchk_rt_init(struct xfs_scrub *sc, struct xchk_rt *sr); +int xchk_rt_init(struct xfs_scrub *sc, struct xchk_rt *sr); void xchk_rt_btcur_free(struct xchk_rt *sr); -void xchk_rt_lock(struct xfs_scrub *sc, struct xchk_rt *sr); +int xchk_rt_lock(struct xfs_scrub *sc, struct xchk_rt *sr); void xchk_rt_unlock(struct xfs_scrub *sc, struct xchk_rt *sr); int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno, struct xchk_ag *sa); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 97605313f097..8eec28aa1a95 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -597,7 +597,9 @@ xrep_dinode_count_rt_rmaps( xrep_is_rtmeta_ino(sc, sc->sm->sm_ino)) return 0; - xchk_rt_lock(sc, &sc->sr); + error = xchk_rt_lock(sc, &sc->sr); + if (error) + return error; xrep_rt_btcur_init(sc, &sc->sr); error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap, dis); diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index c44283b0a502..8b4951ebe2cc 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -487,6 +487,7 @@ xrep_newbt_schedule_reap( INIT_LIST_HEAD(&efi_item.xefi_list); list_add(&efi_item.xefi_list, &items); + xfs_fs_bump_intents(xnr->sc->mp, false, resv->fsbno); resv->efi = xfs_extent_free_defer_type.create_intent(xnr->sc->tp, &items, 1, false); } @@ -708,6 +709,7 @@ xrep_newbt_destroy( goto junkit; list_del(&resv->list); + xfs_fs_drop_intents(sc->mp, false, resv->fsbno); kmem_free(resv); } @@ -720,6 +722,7 @@ junkit: list_for_each_entry_safe(resv, n, &xnr->resv_list, list) { xfs_extent_free_defer_type.abort_intent(resv->efi); list_del(&resv->list); + xfs_fs_drop_intents(sc->mp, false, resv->fsbno); kmem_free(resv); } diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 8f354d27e3ea..1f3e3354749d 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -46,8 +46,7 @@ xchk_setup_rtbitmap( if (error) return error; - xchk_rt_init(sc, &sc->sr); - return 0; + return xchk_rt_init(sc, &sc->sr); } /* Realtime bitmap. */ diff --git a/fs/xfs/scrub/rtrefcount.c b/fs/xfs/scrub/rtrefcount.c index 2a7d5268bd43..860b9feff951 100644 --- a/fs/xfs/scrub/rtrefcount.c +++ b/fs/xfs/scrub/rtrefcount.c @@ -33,8 +33,7 @@ xchk_setup_rtrefcountbt( if (error) return error; - xchk_rt_init(sc, &sc->sr); - return 0; + return xchk_rt_init(sc, &sc->sr); } /* Realtime Reference count btree scrubber. */ diff --git a/fs/xfs/scrub/rtrmap.c b/fs/xfs/scrub/rtrmap.c index acd37e63c4bd..25d56cb1b506 100644 --- a/fs/xfs/scrub/rtrmap.c +++ b/fs/xfs/scrub/rtrmap.c @@ -50,8 +50,7 @@ xchk_setup_rtrmapbt( if (error) return error; - xchk_rt_init(sc, &sc->sr); - return 0; + return xchk_rt_init(sc, &sc->sr); } /* Realtime reverse mapping. */ diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c index edcdc407f6cb..b9fd891f4476 100644 --- a/fs/xfs/scrub/rtrmap_repair.c +++ b/fs/xfs/scrub/rtrmap_repair.c @@ -583,7 +583,9 @@ xrep_rtrmap_find_rmaps( error = xchk_setup_fs(sc); if (error) return error; - xchk_rt_lock(sc, &sc->sr); + error = xchk_rt_lock(sc, &sc->sr); + if (error) + return error; /* Scan for old rtrmap blocks. */ for_each_perag(sc->mp, agno, pag) { diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index be5ec3e8bf12..f4e1a7e1466b 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -71,8 +71,7 @@ xchk_setup_rtsummary( if (error) return error; - xchk_rt_init(sc, &sc->sr); - return 0; + return xchk_rt_init(sc, &sc->sr); } /* Update the summary file to reflect the free extent that we've accumulated. */ diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 4c5cb8072fa6..a4cb64b080f7 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -342,6 +342,21 @@ xfs_bmap_update_create_done( return &xfs_trans_get_bud(tp, BUI_ITEM(intent))->bud_item; } +static inline void +xfs_bmap_drop_intents( + struct xfs_mount *mp, + const struct xfs_bmap_intent *bi, + xfs_fsblock_t orig_startblock) +{ + bool rt; + + if (!xfs_has_rmapbt(mp)) + return; + + rt = xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork); + xfs_fs_drop_intents(mp, rt, orig_startblock); +} + /* Process a deferred rmap update. */ STATIC int xfs_bmap_update_finish_item( @@ -351,14 +366,25 @@ xfs_bmap_update_finish_item( struct xfs_btree_cur **state) { struct xfs_bmap_intent *bi; + struct xfs_mount *mp = tp->t_mountp; + xfs_fsblock_t orig_startblock; int error; bi = container_of(item, struct xfs_bmap_intent, bi_list); + orig_startblock = bi->bi_bmap.br_startblock; error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), bi); if (!error && bi->bi_bmap.br_blockcount > 0) { ASSERT(bi->bi_type == XFS_BMAP_UNMAP); return -EAGAIN; } + + /* + * Drop our intent counter reference now that we've either queued a + * deferred rmap intent or failed. Be careful to use the original + * startblock since the finishing functions can update the intent + * state. + */ + xfs_bmap_drop_intents(mp, bi, orig_startblock); kmem_cache_free(xfs_bmap_intent_cache, bi); return error; } @@ -371,17 +397,41 @@ xfs_bmap_update_abort_intent( xfs_bui_release(BUI_ITEM(intent)); } -/* Cancel a deferred rmap update. */ +/* Cancel a deferred bmap update. */ STATIC void xfs_bmap_update_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_bmap_intent *bi; bi = container_of(item, struct xfs_bmap_intent, bi_list); + xfs_bmap_drop_intents(mp, bi, bi->bi_bmap.br_startblock); kmem_cache_free(xfs_bmap_intent_cache, bi); } +/* Add a deferred bmap update. */ +STATIC void +xfs_bmap_update_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ + const struct xfs_bmap_intent *bi; + bool rt; + + bi = container_of(item, struct xfs_bmap_intent, bi_list); + + /* + * Grab an intent counter reference on behalf of the deferred rmap + * intent item that we will queue when we finish this bmap work. + */ + if (!xfs_has_rmapbt(mp)) + return; + + rt = xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork); + xfs_fs_bump_intents(mp, rt, bi->bi_bmap.br_startblock); +} + const struct xfs_defer_op_type xfs_bmap_update_defer_type = { .max_items = XFS_BUI_MAX_FAST_EXTENTS, .create_intent = xfs_bmap_update_create_intent, @@ -389,6 +439,7 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = { .create_done = xfs_bmap_update_create_done, .finish_item = xfs_bmap_update_finish_item, .cancel_item = xfs_bmap_update_cancel_item, + .add_item = xfs_bmap_update_add_item, }; /* Is this recovered BUI ok? */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 34de89077961..5345eb8efe30 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -489,6 +489,15 @@ xfs_extent_free_create_done( return &xfs_trans_get_efd(tp, EFI_ITEM(intent), count)->efd_item; } +static inline void +xfs_extent_free_drop_intents( + struct xfs_mount *mp, + const struct xfs_extent_free_item *xefi) +{ + xfs_fs_drop_intents(mp, xefi->xefi_flags & XFS_EFI_REALTIME, + xefi->xefi_startblock); +} + /* Process a free extent. */ STATIC int xfs_extent_free_finish_item( @@ -497,6 +506,7 @@ xfs_extent_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { + struct xfs_mount *mp = tp->t_mountp; struct xfs_extent_free_item *xefi; int error; @@ -512,6 +522,7 @@ xfs_extent_free_finish_item( } error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi); + xfs_extent_free_drop_intents(mp, xefi); kmem_cache_free(xfs_extfree_item_cache, xefi); return error; } @@ -527,14 +538,31 @@ xfs_extent_free_abort_intent( /* Cancel a free extent. */ STATIC void xfs_extent_free_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_extent_free_item *xefi; xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + xfs_extent_free_drop_intents(mp, xefi); kmem_cache_free(xfs_extfree_item_cache, xefi); } +/* Add a deferred free extent. */ +STATIC void +xfs_extent_free_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ + const struct xfs_extent_free_item *xefi; + + xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + + /* Grab an intent counter reference for this intent item. */ + xfs_fs_bump_intents(mp, xefi->xefi_flags & XFS_EFI_REALTIME, + xefi->xefi_startblock); +} + const struct xfs_defer_op_type xfs_extent_free_defer_type = { .max_items = XFS_EFI_MAX_FAST_EXTENTS, .create_intent = xfs_extent_free_create_intent, @@ -542,6 +570,7 @@ const struct xfs_defer_op_type xfs_extent_free_defer_type = { .create_done = xfs_extent_free_create_done, .finish_item = xfs_extent_free_finish_item, .cancel_item = xfs_extent_free_cancel_item, + .add_item = xfs_extent_free_add_item, }; /* @@ -596,6 +625,7 @@ xfs_agfl_free_finish_item( extp->ext_len = xefi->xefi_blockcount; efdp->efd_next_extent++; + xfs_extent_free_drop_intents(mp, xefi); kmem_cache_free(xfs_extfree_item_cache, xefi); return error; } @@ -608,6 +638,7 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = { .create_done = xfs_extent_free_create_done, .finish_item = xfs_agfl_free_finish_item, .cancel_item = xfs_extent_free_cancel_item, + .add_item = xfs_extent_free_add_item, }; /* Is this recovered EFI ok? */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 8c4975556a73..fb38e638e1ca 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1510,3 +1510,97 @@ xfs_hook_call( return srcu_notifier_call_chain(&chain->head, val, priv); } #endif /* CONFIG_XFS_LIVE_HOOKS */ + +#ifdef CONFIG_XFS_DRAIN_INTENTS +static inline void xfs_drain_bump(struct xfs_drain *dr) +{ + atomic_inc(&dr->dr_count); +} + +static inline void xfs_drain_drop(struct xfs_drain *dr) +{ + ASSERT(atomic_read(&dr->dr_count) > 0); + + if (atomic_dec_and_test(&dr->dr_count)) + wake_up(&dr->dr_waiters); +} + +static inline int xfs_drain_wait(struct xfs_drain *dr) +{ + return wait_event_killable(dr->dr_waiters, + atomic_read(&dr->dr_count) == 0); +} + +#ifdef CONFIG_XFS_RT +# define xfs_rt_drain_bump(dr) xfs_drain_bump(dr) +# define xfs_rt_drain_drop(dr) xfs_drain_drop(dr) + +/* + * Wait for the pending intent count for realtime metadata to hit zero. + * Callers must not hold any rt metadata inode locks. + */ +int +xfs_rt_drain_intents( + struct xfs_mount *mp) +{ + trace_xfs_rt_wait_intents(mp, __return_address); + return xfs_drain_wait(&mp->m_rt_intents); +} +#else +# define trace_xfs_rt_bump_intents(...) +# define trace_xfs_rt_drop_intents(...) +# define xfs_rt_drain_bump(dr) +# define xfs_rt_drain_drop(dr) +#endif /* CONFIG_XFS_RT */ + +/* Add an item to the pending count. */ +void +xfs_fs_bump_intents( + struct xfs_mount *mp, + bool isrt, + xfs_fsblock_t fsb) +{ + struct xfs_perag *pag; + + if (isrt) { + trace_xfs_rt_bump_intents(mp, __return_address); + xfs_rt_drain_bump(&mp->m_rt_intents); + return; + } + + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, fsb)); + trace_xfs_perag_bump_intents(pag, __return_address); + xfs_drain_bump(&pag->pag_intents); + xfs_perag_put(pag); +} + +/* Remove an item from the pending count. */ +void +xfs_fs_drop_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb) +{ + struct xfs_perag *pag; + + if (isrt) { + trace_xfs_rt_drop_intents(mp, __return_address); + xfs_rt_drain_drop(&mp->m_rt_intents); + return; + } + + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, fsb)); + trace_xfs_perag_drop_intents(pag, __return_address); + xfs_drain_drop(&pag->pag_intents); + xfs_perag_put(pag); +} + +/* + * Wait for the pending intent count for AG metadata to hit zero. + * Callers must not hold any AG header buffers. + */ +int +xfs_perag_drain_intents( + struct xfs_perag *pag) +{ + trace_xfs_perag_wait_intents(pag, __return_address); + return xfs_drain_wait(&pag->pag_intents); +} +#endif /* CONFIG_XFS_DRAIN_INTENTS */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 5943f8f100a7..79fd3293ca36 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -74,6 +74,63 @@ int xfs_hook_call(struct xfs_hook_chain *chain, unsigned long val, void *priv); #endif /* + * Passive drain mechanism. This data structure tracks a count of some items + * and contains a waitqueue for callers who would like to wake up when the + * count hits zero. + */ +struct xfs_drain { +#ifdef CONFIG_XFS_DRAIN_INTENTS + /* Number of items pending in some part of the filesystem. */ + atomic_t dr_count; + + /* Queue to wait for dri_count to go to zero */ + struct wait_queue_head dr_waiters; +#endif /* CONFIG_XFS_DRAIN_INTENTS */ +}; + +#ifdef CONFIG_XFS_DRAIN_INTENTS +# ifdef CONFIG_XFS_RT +int xfs_rt_drain_intents(struct xfs_mount *mp); +# else +# define xfs_rt_wait_intents(mp) (-ENOSYS) +# endif /* CONFIG_XFS_RT */ + +int xfs_perag_drain_intents(struct xfs_perag *pag); + +void xfs_fs_bump_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb); +void xfs_fs_drop_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb); + +/* Are there work items pending? */ +static inline bool xfs_drain_busy(struct xfs_drain *dr) +{ + return atomic_read(&dr->dr_count) > 0; +} + +static inline void xfs_drain_init(struct xfs_drain *dr) +{ + atomic_set(&dr->dr_count, 0); + init_waitqueue_head(&dr->dr_waiters); +} + +static inline void xfs_drain_free(struct xfs_drain *dr) +{ + ASSERT(atomic_read(&dr->dr_count) == 0); +} +#else +static inline void +xfs_fs_bump_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb) +{ +} + +static inline void +xfs_fs_drop_intents(struct xfs_mount *mp, bool isrt, xfs_fsblock_t fsb) +{ +} +# define xfs_drain_init(dr) +# define xfs_drain_free(dr) +#endif /* CONFIG_XFS_DRAIN_INTENTS */ + +/* * Per-cpu deferred inode inactivation GC lists. */ struct xfs_inodegc { @@ -276,6 +333,17 @@ typedef struct xfs_mount { /* online file link count check stuff */ struct xfs_hook_chain m_nlink_delta_hooks; + +#ifdef CONFIG_XFS_RT + /* + * We use xfs_drain to track the number of deferred log intent items + * that have been queued (but not yet processed) so that waiters (e.g. + * scrub) will not lock resources when other threads are in the middle + * of processing a chain of intent items only to find momentary + * inconsistencies. + */ + struct xfs_drain m_rt_intents; +#endif /* CONFIG_XFS_RT */ } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo) diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 8d481cc5ad6a..f4daf76c82d3 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -364,9 +364,12 @@ xfs_refcount_update_finish_item( struct xfs_btree_cur **state) { struct xfs_refcount_intent *ri; + struct xfs_mount *mp = tp->t_mountp; + xfs_fsblock_t orig_startblock; int error; ri = container_of(item, struct xfs_refcount_intent, ri_list); + orig_startblock = ri->ri_startblock; error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), ri, state); @@ -376,6 +379,13 @@ xfs_refcount_update_finish_item( ri->ri_type == XFS_REFCOUNT_DECREASE); return -EAGAIN; } + + /* + * Drop our intent counter reference now that we've finished all the + * work or failed. Be careful to use the original startblock because + * the finishing functions can update the intent state. + */ + xfs_fs_drop_intents(mp, ri->ri_realtime, orig_startblock); kmem_cache_free(xfs_refcount_intent_cache, ri); return error; } @@ -391,14 +401,28 @@ xfs_refcount_update_abort_intent( /* Cancel a deferred refcount update. */ STATIC void xfs_refcount_update_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_refcount_intent *ri; ri = container_of(item, struct xfs_refcount_intent, ri_list); + xfs_fs_drop_intents(mp, ri->ri_realtime, ri->ri_startblock); kmem_cache_free(xfs_refcount_intent_cache, ri); } +/* Add a deferred refcount update. */ +STATIC void +xfs_refcount_update_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ + const struct xfs_refcount_intent *ri; + + ri = container_of(item, struct xfs_refcount_intent, ri_list); + xfs_fs_bump_intents(mp, ri->ri_realtime, ri->ri_startblock); +} + const struct xfs_defer_op_type xfs_refcount_update_defer_type = { .max_items = XFS_CUI_MAX_FAST_EXTENTS, .create_intent = xfs_refcount_update_create_intent, @@ -407,6 +431,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = { .finish_item = xfs_refcount_update_finish_item, .finish_cleanup = xfs_refcount_finish_one_cleanup, .cancel_item = xfs_refcount_update_cancel_item, + .add_item = xfs_refcount_update_add_item, }; /* Is this recovered CUI ok? */ diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index bf2e7d37668f..0d660d000fe7 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -411,11 +411,13 @@ xfs_rmap_update_finish_item( struct xfs_btree_cur **state) { struct xfs_rmap_intent *ri; + struct xfs_mount *mp = tp->t_mountp; int error; ri = container_of(item, struct xfs_rmap_intent, ri_list); error = xfs_trans_log_finish_rmap_update(tp, RUD_ITEM(done), ri, state); + xfs_fs_drop_intents(mp, ri->ri_realtime, ri->ri_bmap.br_startblock); kmem_cache_free(xfs_rmap_intent_cache, ri); return error; } @@ -431,14 +433,28 @@ xfs_rmap_update_abort_intent( /* Cancel a deferred rmap update. */ STATIC void xfs_rmap_update_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_rmap_intent *ri; ri = container_of(item, struct xfs_rmap_intent, ri_list); + xfs_fs_drop_intents(mp, ri->ri_realtime, ri->ri_bmap.br_startblock); kmem_cache_free(xfs_rmap_intent_cache, ri); } +/* Add a deferred rmap update. */ +STATIC void +xfs_rmap_update_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ + const struct xfs_rmap_intent *ri; + + ri = container_of(item, struct xfs_rmap_intent, ri_list); + xfs_fs_bump_intents(mp, ri->ri_realtime, ri->ri_bmap.br_startblock); +} + const struct xfs_defer_op_type xfs_rmap_update_defer_type = { .max_items = XFS_RUI_MAX_FAST_EXTENTS, .create_intent = xfs_rmap_update_create_intent, @@ -447,6 +463,7 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = { .finish_item = xfs_rmap_update_finish_item, .finish_cleanup = xfs_rmap_finish_one_cleanup, .cancel_item = xfs_rmap_update_cancel_item, + .add_item = xfs_rmap_update_add_item, }; /* Is this recovered RUI ok? */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 879e56cd8498..224485863c54 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -726,6 +726,9 @@ xfs_mount_free( ASSERT(!mutex_is_locked(&mp->m_scrub_freeze)); mutex_destroy(&mp->m_scrub_freeze); +#ifdef CONFIG_XFS_RT + xfs_drain_free(&mp->m_rt_intents); +#endif kmem_free(mp); } @@ -1968,6 +1971,10 @@ static int xfs_init_fs_context( if (!mp) return -ENOMEM; +#ifdef CONFIG_XFS_RT + xfs_drain_init(&mp->m_rt_intents); +#endif + spin_lock_init(&mp->m_sb_lock); spin_lock_init(&mp->m_agirotor_lock); INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); diff --git a/fs/xfs/xfs_swapext_item.c b/fs/xfs/xfs_swapext_item.c index f51e83e33027..9e084f9996c1 100644 --- a/fs/xfs/xfs_swapext_item.c +++ b/fs/xfs/xfs_swapext_item.c @@ -346,6 +346,7 @@ xfs_swapext_abort_intent( /* Cancel a deferred swapext update. */ STATIC void xfs_swapext_cancel_item( + struct xfs_mount *mp, struct list_head *item) { struct xfs_swapext_intent *sxi; @@ -354,6 +355,14 @@ xfs_swapext_cancel_item( kmem_free(sxi); } +/* Add a deferred swapext update. */ +STATIC void +xfs_swapext_add_item( + struct xfs_mount *mp, + const struct list_head *item) +{ +} + const struct xfs_defer_op_type xfs_swapext_defer_type = { .max_items = XFS_SXI_MAX_FAST_EXTENTS, .create_intent = xfs_swapext_create_intent, @@ -361,6 +370,7 @@ const struct xfs_defer_op_type xfs_swapext_defer_type = { .create_done = xfs_swapext_create_done, .finish_item = xfs_swapext_finish_item, .cancel_item = xfs_swapext_cancel_item, + .add_item = xfs_swapext_add_item, }; /* Is this recovered SXI ok? */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 3c1e61e57805..fed0ede02820 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2767,6 +2767,44 @@ DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_deferred); DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer); DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred); +DECLARE_EVENT_CLASS(xfs_defer_pending_item_class, + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, + void *item), + TP_ARGS(mp, dfp, item), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(void *, intent) + __field(void *, item) + __field(char, committed) + __field(int, nr) + ), + TP_fast_assign( + __entry->dev = mp ? mp->m_super->s_dev : 0; + __entry->type = dfp->dfp_type; + __entry->intent = dfp->dfp_intent; + __entry->item = item; + __entry->committed = dfp->dfp_done != NULL; + __entry->nr = dfp->dfp_count; + ), + TP_printk("dev %d:%d optype %d intent %p item %p committed %d nr %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->intent, + __entry->item, + __entry->committed, + __entry->nr) +) +#define DEFINE_DEFER_PENDING_ITEM_EVENT(name) \ +DEFINE_EVENT(xfs_defer_pending_item_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, \ + void *item), \ + TP_ARGS(mp, dfp, item)) + +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_add_item); +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_cancel_item); +DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_finish_item); + /* rmap tracepoints */ DECLARE_EVENT_CLASS(xfs_rmap_class, TP_PROTO(struct xfs_btree_cur *cur, @@ -4931,6 +4969,70 @@ TRACE_EVENT(xfs_growfs_check_rtgeom, __entry->min_logfsbs) ); +#ifdef CONFIG_XFS_DRAIN_INTENTS +DECLARE_EVENT_CLASS(xfs_perag_intents_class, + TP_PROTO(struct xfs_perag *pag, void *caller_ip), + TP_ARGS(pag, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(long, nr_intents) + __field(void *, caller_ip) + ), + TP_fast_assign( + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->agno = pag->pag_agno; + __entry->nr_intents = atomic_read(&pag->pag_intents.dr_count); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno 0x%x intents %ld caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->nr_intents, + __entry->caller_ip) +); + +#define DEFINE_PERAG_INTENTS_EVENT(name) \ +DEFINE_EVENT(xfs_perag_intents_class, name, \ + TP_PROTO(struct xfs_perag *pag, void *caller_ip), \ + TP_ARGS(pag, caller_ip)) +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_bump_intents); +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_drop_intents); +DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents); + +# ifdef CONFIG_XFS_RT +DECLARE_EVENT_CLASS(xfs_rt_intents_class, + TP_PROTO(struct xfs_mount *mp, void *caller_ip), + TP_ARGS(mp, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, rtdev) + __field(long, nr_intents) + __field(void *, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->rtdev = mp->m_rtdev_targp->bt_dev; + __entry->nr_intents = atomic_read(&mp->m_rt_intents.dr_count); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d rtdev %d:%d intents %ld caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->rtdev), MINOR(__entry->rtdev), + __entry->nr_intents, + __entry->caller_ip) +); + +#define DEFINE_RT_INTENTS_EVENT(name) \ +DEFINE_EVENT(xfs_rt_intents_class, name, \ + TP_PROTO(struct xfs_mount *mp, void *caller_ip), \ + TP_ARGS(mp, caller_ip)) +DEFINE_RT_INTENTS_EVENT(xfs_rt_bump_intents); +DEFINE_RT_INTENTS_EVENT(xfs_rt_drop_intents); +DEFINE_RT_INTENTS_EVENT(xfs_rt_wait_intents); +# endif /* CONFIG_XFS_RT */ +#endif /* CONFIG_XFS_DRAIN_INTENTS */ + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH |