diff options
Diffstat (limited to 'fs/xfs/scrub/reap.c')
-rw-r--r-- | fs/xfs/scrub/reap.c | 388 |
1 files changed, 312 insertions, 76 deletions
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c index df13a9e0fe86..1a0d24295e5f 100644 --- a/fs/xfs/scrub/reap.c +++ b/fs/xfs/scrub/reap.c @@ -27,6 +27,10 @@ #include "xfs_quota.h" #include "xfs_qm.h" #include "xfs_bmap.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_attr_remote.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -76,20 +80,29 @@ */ /* Information about reaping extents after a repair. */ -struct xrep_reap_state { +struct xreap_state { struct xfs_scrub *sc; /* Reverse mapping owner and metadata reservation type. */ const struct xfs_owner_info *oinfo; enum xfs_ag_resv_type resv; + /* If true, roll the transaction before reaping the next extent. */ + bool force_roll; + /* Number of deferred reaps attached to the current transaction. */ unsigned int deferred; + + /* Number of invalidated buffers logged to the current transaction. */ + unsigned int invalidated; + + /* Number of deferred reaps queued during the whole reap sequence. */ + unsigned long long total_deferred; }; /* Put a block back on the AGFL. */ STATIC int -xrep_put_freelist( +xreap_put_freelist( struct xfs_scrub *sc, xfs_agblock_t agbno) { @@ -126,69 +139,226 @@ xrep_put_freelist( return 0; } -/* Try to invalidate the incore buffer for a block that we're about to free. */ +/* Are there any uncommitted reap operations? */ +static inline bool xreap_dirty(const struct xreap_state *rs) +{ + if (rs->force_roll) + return true; + if (rs->deferred) + return true; + if (rs->invalidated) + return true; + if (rs->total_deferred) + return true; + return false; +} + +#define XREAP_MAX_BINVAL (2048) + +/* + * Decide if we want to roll the transaction after reaping an extent. We don't + * want to overrun the transaction reservation, so we prohibit more than + * 128 EFIs per transaction. For the same reason, we limit the number + * of buffer invalidations to 2048. + */ +static inline bool xreap_want_roll(const struct xreap_state *rs) +{ + if (rs->force_roll) + return true; + if (rs->deferred > XREP_MAX_ITRUNCATE_EFIS) + return true; + if (rs->invalidated > XREAP_MAX_BINVAL) + return true; + return false; +} + +static inline void xreap_reset(struct xreap_state *rs) +{ + rs->total_deferred += rs->deferred; + rs->deferred = 0; + rs->invalidated = 0; + rs->force_roll = false; +} + +#define XREAP_MAX_DEFER_CHAIN (2048) + +/* + * Decide if we want to finish the deferred ops that are attached to the scrub + * transaction. We don't want to queue huge chains of deferred ops because + * that can consume a lot of log space and kernel memory. Hence we trigger a + * xfs_defer_finish if there are more than 2048 deferred reap operations or the + * caller did some real work. + */ +static inline bool +xreap_want_defer_finish(const struct xreap_state *rs) +{ + if (rs->force_roll) + return true; + if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN) + return true; + return false; +} + +static inline void xreap_defer_finish_reset(struct xreap_state *rs) +{ + rs->total_deferred = 0; + rs->deferred = 0; + rs->invalidated = 0; + rs->force_roll = false; +} + +/* Try to invalidate the incore buffers for an extent that we're freeing. */ STATIC void -xrep_block_reap_binval( - struct xfs_scrub *sc, - xfs_fsblock_t fsbno) +xreap_agextent_binval( + struct xreap_state *rs, + xfs_agblock_t agbno, + xfs_extlen_t *aglenp) { - struct xfs_buf *bp = NULL; - int error; + struct xfs_scrub *sc = rs->sc; + struct xfs_perag *pag = sc->sa.pag; + struct xfs_mount *mp = sc->mp; + xfs_agnumber_t agno = sc->sa.pag->pag_agno; + xfs_agblock_t agbno_next = agbno + *aglenp; + xfs_agblock_t bno = agbno; /* - * If there's an incore buffer for exactly this block, invalidate it. * Avoid invalidating AG headers and post-EOFS blocks because we never * own those. */ - if (!xfs_verify_fsbno(sc->mp, fsbno)) + if (!xfs_verify_agbno(pag, agbno) || + !xfs_verify_agbno(pag, agbno_next - 1)) return; /* - * We assume that the lack of any other known owners means that the - * buffer can be locked without risk of deadlocking. + * If there are incore buffers for these blocks, invalidate them. We + * assume that the lack of any other known owners means that the buffer + * can be locked without risk of deadlocking. The buffer cache cannot + * detect aliasing, so employ nested loops to scan for incore buffers + * of any plausible size. */ - error = xfs_buf_incore(sc->mp->m_ddev_targp, - XFS_FSB_TO_DADDR(sc->mp, fsbno), - XFS_FSB_TO_BB(sc->mp, 1), XBF_LIVESCAN, &bp); - if (error) - return; + while (bno < agbno_next) { + xfs_agblock_t fsbcount; + xfs_agblock_t max_fsbs; + + /* + * Max buffer size is the max remote xattr buffer size, which + * is one fs block larger than 64k. + */ + max_fsbs = min_t(xfs_agblock_t, agbno_next - bno, + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX)); + + for (fsbcount = 1; fsbcount < max_fsbs; fsbcount++) { + struct xfs_buf *bp = NULL; + xfs_daddr_t daddr; + int error; + + daddr = XFS_AGB_TO_DADDR(mp, agno, bno); + error = xfs_buf_incore(mp->m_ddev_targp, daddr, + XFS_FSB_TO_BB(mp, fsbcount), + XBF_LIVESCAN, &bp); + if (error) + continue; + + xfs_trans_bjoin(sc->tp, bp); + xfs_trans_binval(sc->tp, bp); + rs->invalidated++; + + /* + * Stop invalidating if we've hit the limit; we should + * still have enough reservation left to free however + * far we've gotten. + */ + if (rs->invalidated > XREAP_MAX_BINVAL) { + *aglenp -= agbno_next - bno; + goto out; + } + } + + bno++; + } - xfs_trans_bjoin(sc->tp, bp); - xfs_trans_binval(sc->tp, bp); +out: + trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp); } -/* Dispose of a single block. */ +/* + * Figure out the longest run of blocks that we can dispose of with a single + * call. Cross-linked blocks should have their reverse mappings removed, but + * single-owner extents can be freed. AGFL blocks can only be put back one at + * a time. + */ STATIC int -xrep_reap_block( - uint64_t fsbno, - void *priv) +xreap_agextent_select( + struct xreap_state *rs, + xfs_agblock_t agbno, + xfs_agblock_t agbno_next, + bool *crosslinked, + xfs_extlen_t *aglenp) { - struct xrep_reap_state *rs = priv; - struct xfs_scrub *sc = rs->sc; - struct xfs_btree_cur *cur; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - bool has_other_rmap; - bool need_roll = true; - int error; + struct xfs_scrub *sc = rs->sc; + struct xfs_btree_cur *cur; + xfs_agblock_t bno = agbno + 1; + xfs_extlen_t len = 1; + int error; - agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); - agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); + /* + * Determine if there are any other rmap records covering the first + * block of this extent. If so, the block is crosslinked. + */ + cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp, + sc->sa.pag); + error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo, + crosslinked); + if (error) + goto out_cur; - /* We don't support reaping file extents yet. */ - if (sc->ip != NULL || sc->sa.pag->pag_agno != agno) { - ASSERT(0); - return -EFSCORRUPTED; - } + /* AGFL blocks can only be deal with one at a time. */ + if (rs->resv == XFS_AG_RESV_AGFL) + goto out_found; - cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp, sc->sa.pag); + /* + * Figure out how many of the subsequent blocks have the same crosslink + * status. + */ + while (bno < agbno_next) { + bool also_crosslinked; - /* Can we find any other rmappings? */ - error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo, - &has_other_rmap); + error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo, + &also_crosslinked); + if (error) + goto out_cur; + + if (*crosslinked != also_crosslinked) + break; + + len++; + bno++; + } + +out_found: + *aglenp = len; + trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked); +out_cur: xfs_btree_del_cursor(cur, error); - if (error) - return error; + return error; +} + +/* + * Dispose of as much of the beginning of this AG extent as possible. The + * number of blocks disposed of will be returned in @aglenp. + */ +STATIC int +xreap_agextent_iter( + struct xreap_state *rs, + xfs_agblock_t agbno, + xfs_extlen_t *aglenp, + bool crosslinked) +{ + struct xfs_scrub *sc = rs->sc; + xfs_fsblock_t fsbno; + int error = 0; + + fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno); /* * If there are other rmappings, this block is cross linked and must @@ -203,55 +373,117 @@ xrep_reap_block( * blow on writeout, the filesystem will shut down, and the admin gets * to run xfs_repair. */ - if (has_other_rmap) { - trace_xrep_dispose_unmap_extent(sc->sa.pag, agbno, 1); + if (crosslinked) { + trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp); + + rs->force_roll = true; + return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, + *aglenp, rs->oinfo); + } + + trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp); - error = xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, - 1, rs->oinfo); + /* + * Invalidate as many buffers as we can, starting at agbno. If this + * function sets *aglenp to zero, the transaction is full of logged + * buffer invalidations, so we need to return early so that we can + * roll and retry. + */ + xreap_agextent_binval(rs, agbno, aglenp); + if (*aglenp == 0) { + ASSERT(xreap_want_roll(rs)); + return 0; + } + + /* Put blocks back on the AGFL one at a time. */ + if (rs->resv == XFS_AG_RESV_AGFL) { + ASSERT(*aglenp == 1); + error = xreap_put_freelist(sc, agbno); if (error) return error; - goto roll_out; + rs->force_roll = true; + return 0; + } + + /* + * Use deferred frees to get rid of the old btree blocks to try to + * minimize the window in which we could crash and lose the old blocks. + */ + error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo, + rs->resv, true); + if (error) + return error; + + rs->deferred++; + return 0; +} + +/* + * Break an AG metadata extent into sub-extents by fate (crosslinked, not + * crosslinked), and dispose of each sub-extent separately. + */ +STATIC int +xreap_agmeta_extent( + uint64_t fsbno, + uint64_t len, + void *priv) +{ + struct xreap_state *rs = priv; + struct xfs_scrub *sc = rs->sc; + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); + xfs_agblock_t agbno_next = agbno + len; + int error = 0; + + ASSERT(len <= XFS_MAX_BMBT_EXTLEN); + ASSERT(sc->ip == NULL); + + if (agno != sc->sa.pag->pag_agno) { + ASSERT(sc->sa.pag->pag_agno == agno); + return -EFSCORRUPTED; } - trace_xrep_dispose_free_extent(sc->sa.pag, agbno, 1); + while (agbno < agbno_next) { + xfs_extlen_t aglen; + bool crosslinked; - xrep_block_reap_binval(sc, fsbno); + error = xreap_agextent_select(rs, agbno, agbno_next, + &crosslinked, &aglen); + if (error) + return error; - if (rs->resv == XFS_AG_RESV_AGFL) { - error = xrep_put_freelist(sc, agbno); - } else { - /* - * Use deferred frees to get rid of the old btree blocks to try - * to minimize the window in which we could crash and lose the - * old blocks. However, we still need to roll the transaction - * every 100 or so EFIs so that we don't exceed the log - * reservation. - */ - error = __xfs_free_extent_later(sc->tp, fsbno, 1, rs->oinfo, - rs->resv, true); + error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked); if (error) return error; - rs->deferred++; - need_roll = rs->deferred > 100; + + if (xreap_want_defer_finish(rs)) { + error = xrep_defer_finish(sc); + if (error) + return error; + xreap_defer_finish_reset(rs); + } else if (xreap_want_roll(rs)) { + error = xrep_roll_ag_trans(sc); + if (error) + return error; + xreap_reset(rs); + } + + agbno += aglen; } - if (error || !need_roll) - return error; -roll_out: - rs->deferred = 0; - return xrep_roll_ag_trans(sc); + return 0; } -/* Dispose of every block of every extent in the bitmap. */ +/* Dispose of every block of every AG metadata extent in the bitmap. */ int -xrep_reap_extents( +xrep_reap_ag_metadata( struct xfs_scrub *sc, struct xbitmap *bitmap, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type) { - struct xrep_reap_state rs = { + struct xreap_state rs = { .sc = sc, .oinfo = oinfo, .resv = type, @@ -259,10 +491,14 @@ xrep_reap_extents( int error; ASSERT(xfs_has_rmapbt(sc->mp)); + ASSERT(sc->ip == NULL); - error = xbitmap_walk_bits(bitmap, xrep_reap_block, &rs); - if (error || rs.deferred == 0) + error = xbitmap_walk(bitmap, xreap_agmeta_extent, &rs); + if (error) return error; - return xrep_roll_ag_trans(sc); + if (xreap_dirty(&rs)) + return xrep_defer_finish(sc); + + return 0; } |