diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2020-02-19 17:01:41 -0800 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2020-06-24 18:12:18 -0700 |
commit | c43342596290ac9bcff9de6f0a3b63cfa42ddcd8 (patch) | |
tree | 48654b9fdc414da0f48ecd8b2dc2aa21187fdefb | |
parent | c5a1d4626def6f24674263f2dcc0570af84e08e8 (diff) |
xfs: log EFIs for all btree blocks being used to stage a btreerepair-prep-for-bulk-loading_2020-06-24
We need to log EFIs for every extent that we allocate for the purpose of
staging a new btree so that if we fail then the blocks will be freed
during log recovery. Add a function to relog the EFIs, so that repair
can relog them all every time it creates a new btree block, which will
help us to avoid pinning the log tail.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/scrub/repair.c | 90 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_extfree_item.c | 2 |
3 files changed, 89 insertions, 6 deletions
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 669c33460d31..a24f086da848 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -26,6 +26,8 @@ #include "xfs_ag_resv.h" #include "xfs_quota.h" #include "xfs_bmap.h" +#include "xfs_defer.h" +#include "xfs_extfree_item.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -421,12 +423,39 @@ xrep_newbt_init_bare( XFS_AG_RESV_NONE); } +/* + * Set up automatic reaping of the blocks reserved for btree reconstruction in + * case we crash by logging a deferred free item for each extent we allocate so + * that we can get all of the space back if we crash before we can commit the + * new btree. This function returns a token that can be used to cancel + * automatic reaping if repair is successful. + */ +static void +xrep_newbt_schedule_reap( + struct xrep_newbt *xnr, + struct xrep_newbt_resv *resv) +{ + struct xfs_extent_free_item efi_item = { + .xefi_startblock = resv->fsbno, + .xefi_blockcount = resv->len, + .xefi_oinfo = xnr->oinfo, /* struct copy */ + .xefi_skip_discard = true, + }; + LIST_HEAD(items); + + INIT_LIST_HEAD(&efi_item.xefi_list); + list_add(&efi_item.xefi_list, &items); + resv->efi = xfs_extent_free_defer_type.create_intent(xnr->sc->tp, + &items, 1, false); +} + /* Designate specific blocks to be used to build our new btree. */ -int -xrep_newbt_add_blocks( +static int +__xrep_newbt_add_blocks( struct xrep_newbt *xnr, xfs_fsblock_t fsbno, - xfs_extlen_t len) + xfs_extlen_t len, + bool auto_reap) { struct xrep_newbt_resv *resv; @@ -438,10 +467,25 @@ xrep_newbt_add_blocks( resv->fsbno = fsbno; resv->len = len; resv->used = 0; + if (auto_reap) + xrep_newbt_schedule_reap(xnr, resv); list_add_tail(&resv->list, &xnr->resv_list); return 0; } +/* + * Allow certain callers to add disk space directly to the reservation. + * Callers are responsible for cleaning up the reservations. + */ +int +xrep_newbt_add_blocks( + struct xrep_newbt *xnr, + xfs_fsblock_t fsbno, + xfs_extlen_t len) +{ + return __xrep_newbt_add_blocks(xnr, fsbno, len, false); +} + /* Allocate disk space for our new btree. */ int xrep_newbt_alloc_blocks( @@ -483,7 +527,8 @@ xrep_newbt_alloc_blocks( XFS_FSB_TO_AGBNO(sc->mp, args.fsbno), args.len, xnr->oinfo.oi_owner); - error = xrep_newbt_add_blocks(xnr, args.fsbno, args.len); + error = __xrep_newbt_add_blocks(xnr, args.fsbno, args.len, + true); if (error) return error; @@ -499,6 +544,30 @@ xrep_newbt_alloc_blocks( } /* + * Relog the EFIs attached to a staging btree so that we don't pin the log. + * We really only need to do this if the log is getting full due to a lot of + * concurrent activity; otherwise, it just wastes CPU time. XXX + */ +int +xrep_newbt_relog_efis( + struct xrep_newbt *xnr) +{ + struct xrep_newbt_resv *resv; + bool did_work = false; + + list_for_each_entry(resv, &xnr->resv_list, list) { + if (!resv->efi) + continue; + resv->efi = xfs_trans_item_relog(resv->efi, xnr->sc->tp); + did_work = true; + } + + if (!did_work) + return 0; + return xrep_roll_trans(xnr->sc); +} + +/* * Release blocks that were reserved for a btree repair. If the repair * succeeded then we log deferred frees for unused blocks. Otherwise, we try * to free the extents immediately to roll the filesystem back to where it was @@ -511,6 +580,18 @@ xrep_newbt_destroy_reservation( bool cancel_repair) { struct xfs_scrub *sc = xnr->sc; + struct xfs_log_item *lip; + + /* + * Earlier, we logged EFIs for the extents that we allocated to hold + * the new btree so that we could automatically roll back those + * allocations if the system crashed. Now we log an EFD to cancel the + * EFI, either because the repair succeeded and the new blocks are in + * use; or because the repair was cancelled and we're about to free + * the extents directly. + */ + lip = xfs_extent_free_defer_type.create_done(sc->tp, resv->efi, 0); + set_bit(XFS_LI_DIRTY, &lip->li_flags); if (cancel_repair) { int error; @@ -579,6 +660,7 @@ junkit: * reservations. */ list_for_each_entry_safe(resv, n, &xnr->resv_list, list) { + xfs_extent_free_defer_type.abort_intent(resv->efi); list_del(&resv->list); kmem_free(resv); } diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 611e1669222d..e7079f708252 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -68,6 +68,8 @@ struct xrep_newbt_resv { /* Link to list of extents that we've reserved. */ struct list_head list; + struct xfs_log_item *efi; + /* FSB of the block we reserved. */ xfs_fsblock_t fsbno; @@ -114,6 +116,7 @@ int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr, union xfs_btree_ptr *ptr); void xrep_bload_estimate_slack(struct xfs_scrub *sc, struct xfs_btree_bload *bload); +int xrep_newbt_relog_efis(struct xrep_newbt *xnr); #else diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 5f239af43046..523e40eae2d9 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -325,8 +325,6 @@ xfs_trans_get_efd( { struct xfs_efd_log_item *efdp; - ASSERT(nextents > 0); - if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + (nextents - 1) * sizeof(struct xfs_extent), |