diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2019-08-09 09:30:12 -0700 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2019-08-10 09:58:11 -0700 |
commit | 772d9d9f3e057d022ca6d235b2ea5b0be599d582 (patch) | |
tree | 6a8eafb1dcf3ae89287d792f0dd4eefef1138745 | |
parent | 00b8d248886e778edb502c4794faf00c0424cd84 (diff) |
convert freespto btree bulk loadrepair-redesign_2019-08-10
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc_btree.c | 89 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc_btree.h | 7 | ||||
-rw-r--r-- | fs/xfs/scrub/alloc_repair.c | 599 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.c | 15 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 4 |
5 files changed, 459 insertions, 255 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 2a94543857a1..c71318216931 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -24,6 +24,10 @@ STATIC struct xfs_btree_cur * xfs_allocbt_dup_cursor( struct xfs_btree_cur *cur) { + if (cur->bc_flags & XFS_BTREE_STAGING) + return xfs_allocbt_stage_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.afake, + cur->bc_private.a.agno, cur->bc_btnum); return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agbp, cur->bc_private.a.agno, cur->bc_btnum); @@ -474,15 +478,13 @@ static const struct xfs_btree_ops xfs_cntbt_ops = { /* * Allocate a new allocation btree cursor. */ -struct xfs_btree_cur * /* new alloc btree cursor */ -xfs_allocbt_init_cursor( +STATIC struct xfs_btree_cur * /* new alloc btree cursor */ +xfs_allocbt_init_common( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for agf structure */ xfs_agnumber_t agno, /* allocation group number */ xfs_btnum_t btnum) /* btree identifier */ { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); struct xfs_btree_cur *cur; ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); @@ -493,28 +495,95 @@ xfs_allocbt_init_cursor( cur->bc_mp = mp; cur->bc_btnum = btnum; cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur->bc_private.a.agno = agno; - if (btnum == XFS_BTNUM_CNT) { + if (btnum == XFS_BTNUM_CNT) cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); + else + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); + + if (xfs_sb_version_hascrc(&mp->m_sb)) + cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; + + return cur; +} + +/* + * Allocate a new allocation btree cursor. + */ +struct xfs_btree_cur * /* new alloc btree cursor */ +xfs_allocbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for agf structure */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_btnum_t btnum) /* btree identifier */ +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_btree_cur *cur; + + cur = xfs_allocbt_init_common(mp, tp, agno, btnum); + if (btnum == XFS_BTNUM_CNT) { cur->bc_ops = &xfs_cntbt_ops; cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); - cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; + cur->bc_flags |= XFS_BTREE_LASTREC_UPDATE; } else { - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); cur->bc_ops = &xfs_bnobt_ops; cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); } cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; - if (xfs_sb_version_hascrc(&mp->m_sb)) - cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; + return cur; +} +/* Create a free space btree cursor with a fake root for staging. */ +struct xfs_btree_cur * +xfs_allocbt_stage_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xbtree_afakeroot *afake, + xfs_agnumber_t agno, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + struct xfs_btree_ops *ops; + + cur = xfs_allocbt_init_common(mp, tp, agno, btnum); + if (btnum == XFS_BTNUM_BNO) + xfs_btree_stage_afakeroot(cur, afake, &xfs_bnobt_ops, &ops); + else + xfs_btree_stage_afakeroot(cur, afake, &xfs_cntbt_ops, &ops); + ops->set_root = xbtree_afakeroot_set_root; + ops->init_ptr_from_cur = xbtree_afakeroot_init_ptr_from_cur; return cur; } /* + * Install a new inobt btree root. Caller is responsible for invalidating + * and freeing the old btree blocks. + */ +void +xfs_allocbt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_buf *agbp) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xbtree_afakeroot *afake = cur->bc_private.a.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); + agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); + + if (cur->bc_btnum == XFS_BTNUM_BNO) + xfs_btree_commit_afakeroot(cur, agbp, &xfs_bnobt_ops); + else + xfs_btree_commit_afakeroot(cur, agbp, &xfs_cntbt_ops); +} + +/* * Calculate number of records in an alloc btree block. */ int diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index c9305ebb69f6..dde324609a89 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -13,6 +13,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xbtree_afakeroot; /* * Btree block header size depends on a superblock flag. @@ -48,8 +49,14 @@ struct xfs_mount; extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, xfs_btnum_t); +struct xfs_btree_cur *xfs_allocbt_stage_cursor(struct xfs_mount *mp, + struct xfs_trans *tp, struct xbtree_afakeroot *afake, + xfs_agnumber_t agno, xfs_btnum_t btnum); extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, unsigned long long len); +void xfs_allocbt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_buf *agbp); + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c index f21506dbffaa..2bf632eb1df7 100644 --- a/fs/xfs/scrub/alloc_repair.c +++ b/fs/xfs/scrub/alloc_repair.c @@ -23,6 +23,7 @@ #include "xfs_refcount.h" #include "xfs_extent_busy.h" #include "xfs_health.h" +#include "xfs_bmap.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -72,11 +73,26 @@ struct xrep_abt { /* All OWN_AG blocks. */ struct xfs_bitmap old_allocbt_blocks; + /* + * New bnobt information. All btree block reservations are added to + * the reservation list in new_bnobt_info. + */ + struct xrep_newbt new_bnobt_info; + + /* new cntbt information */ + struct xrep_newbt new_cntbt_info; + /* Free space extents. */ struct xfbma *free_records; struct xfs_scrub *sc; + /* Number of non-null records in @free_records. */ + uint64_t nr_real_records; + + /* get_data()'s position in the free space record array. */ + uint64_t iter; + /* * Next block we anticipate seeing in the rmap records. If the next * rmap record is greater than next_bno, we have found unused space. @@ -85,6 +101,9 @@ struct xrep_abt { /* Number of free blocks in this AG. */ xfs_agblock_t nr_blocks; + + /* Longest free extent we found in the AG. */ + xfs_agblock_t longest; }; /* Record extents that aren't in use from gaps in the rmap records. */ @@ -151,9 +170,12 @@ xrep_abt_walk_agfl( return xfs_bitmap_set(&ra->not_allocbt_blocks, fsb, 1); } -/* Compare two free space extents. */ +/* + * Compare two free space extents by block number. We want to sort by block + * number. + */ static int -xrep_abt_extent_cmp( +xrep_bnobt_extent_cmp( const void *a, const void *b) { @@ -168,93 +190,30 @@ xrep_abt_extent_cmp( } /* - * Add a free space record back into the bnobt/cntbt. It is assumed that the - * space is already accounted for in fdblocks, so we use a special per-AG - * reservation code to skip the fdblocks update. + * Compare two free space extents by length and then block number. We want + * to sort first in order of decreasing length and then in increasing block + * number. */ -STATIC int -xrep_abt_free_extent( - const void *item, - void *priv) -{ - struct xrep_abt *ra = priv; - struct xfs_scrub *sc = ra->sc; - const struct xrep_abt_extent *rae = item; - xfs_fsblock_t fsbno; - int error; - - fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, rae->bno); - - error = xfs_free_extent(sc->tp, fsbno, rae->len, - &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_IGNORE); - if (error) - return error; - return xrep_roll_ag_trans(sc); -} - -/* Find the longest free extent in the list. */ static int -xrep_abt_get_longest( - struct xrep_abt *ra, - struct xrep_abt_extent *longest) -{ - struct xrep_abt_extent rae; - uint64_t victim = -1ULL; - uint64_t i; - - longest->len = 0; - foreach_xfbma_item(ra->free_records, i, rae) { - if (rae.len > longest->len) { - memcpy(longest, &rae, sizeof(*longest)); - victim = i; - } - } - - if (longest->len == 0) - return 0; - return xfbma_nullify(ra->free_records, victim); -} - -/* - * Allocate a block from the (cached) first extent in the AG. In theory - * this should never fail, since we already checked that there was enough - * space to handle the new btrees. - */ -STATIC xfs_agblock_t -xrep_abt_alloc_block( - struct xrep_abt *ra) +xrep_cntbt_extent_cmp( + const void *a, + const void *b) { - struct xrep_abt_extent ext = { 0 }; - uint64_t i; - xfs_agblock_t agbno; - int error; + const struct xrep_abt_extent *ap = a; + const struct xrep_abt_extent *bp = b; - /* Pull the first free space extent off the list, and... */ - foreach_xfbma_item(ra->free_records, i, ext) { - break; - } - if (ext.len == 0) - return NULLAGBLOCK; - - /* ...take its first block. */ - agbno = ext.bno; - ext.bno++; - ext.len--; - if (ext.len) - error = xfbma_set(ra->free_records, i, &ext); - else - error = xfbma_nullify(ra->free_records, i); - if (error) - return NULLAGBLOCK; - return agbno; + if (ap->len > bp->len) + return 1; + else if (ap->len < bp->len) + return -1; + return xrep_bnobt_extent_cmp(a, b); } /* - * Iterate all reverse mappings to find (1) the free extents, (2) the OWN_AG - * extents, (3) the rmapbt blocks, and (4) the AGFL blocks. The free space is - * (1) + (2) - (3) - (4). Figure out if we have enough free space to - * reconstruct the free space btrees. Caller must clean up the input lists - * if something goes wrong. + * Iterate all reverse mappings to find (1) the gaps between rmap records (all + * unowned space), (2) the OWN_AG extents (which encompass the free space + * btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL + * blocks. The free space is (1) + (2) - (3) - (4). */ STATIC int xrep_abt_find_freespace( @@ -264,7 +223,6 @@ xrep_abt_find_freespace( struct xfs_btree_cur *cur; struct xfs_mount *mp = sc->mp; xfs_agblock_t agend; - xfs_agblock_t nr_blocks; int error; xfs_bitmap_init(&ra->not_allocbt_blocks); @@ -274,7 +232,7 @@ xrep_abt_find_freespace( * mappings, all the OWN_AG blocks, and all the rmapbt extents. */ cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno); - error = xfs_rmap_query_all(cur, xrep_abt_walk_rmap, &ra); + error = xfs_rmap_query_all(cur, xrep_abt_walk_rmap, ra); xfs_btree_del_cursor(cur, error); if (error) goto err; @@ -294,200 +252,376 @@ xrep_abt_find_freespace( /* Collect all the AGFL blocks. */ error = xfs_agfl_walk(mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), - sc->sa.agfl_bp, xrep_abt_walk_agfl, &ra); + sc->sa.agfl_bp, xrep_abt_walk_agfl, ra); if (error) goto err; - /* - * Do we have enough space to rebuild both freespace btrees? We won't - * touch the AG if we've exceeded the per-AG reservation or if we don't - * have enough free space to store the free space information. - */ - nr_blocks = 2 * xfs_allocbt_calc_size(mp, - xfbma_length(ra->free_records)); - if (!xrep_ag_has_space(sc->sa.pag, 0, XFS_AG_RESV_NONE) || - ra->nr_blocks < nr_blocks) { - error = -ENOSPC; - goto err; - } - /* Compute the old bnobt/cntbt blocks. */ error = xfs_bitmap_disunion(&ra->old_allocbt_blocks, &ra->not_allocbt_blocks); + if (error) + goto err; + + ra->nr_real_records = xfbma_length(ra->free_records); err: xfs_bitmap_destroy(&ra->not_allocbt_blocks); return error; } /* - * Reset the global free block counter and the per-AG counters to make it look - * like this AG has no free space. + * We're going to use the observed free space records to reserve blocks for the + * new free space btrees, so we play an iterative game where we try to converge + * on the number of blocks we need: + * + * 1. Estimate how many blocks we'll need to store the records. + * 2. If the first free record has more blocks than we need, we're done. + * We will have to re-sort the records prior to building the cntbt. + * 3. If that record has exactly the number of blocks we need, null out the + * record. We're done. + * 4. Otherwise, we still need more blocks. Null out the record, subtract its + * length from the number of blocks we need, and go back to step 1. + * + * Fortunately, we don't have to do any transaction work to play this game, so + * we don't have to tear down the staging cursors. */ STATIC int -xrep_abt_reset_counters( - struct xfs_scrub *sc, - int *log_flags) +xrep_abt_reserve_space( + struct xrep_abt *ra, + struct xfs_btree_cur *bno_cur, + struct xfs_btree_bload *bno_bload, + struct xfs_btree_cur *cnt_cur, + struct xfs_btree_bload *cnt_bload, + bool *need_resort) { - struct xfs_perag *pag = sc->sa.pag; - struct xfs_agf *agf; - xfs_agblock_t new_btblks; - xfs_agblock_t to_free; + struct xfs_scrub *sc = ra->sc; + uint64_t record_nr = xfbma_length(ra->free_records) - 1; + unsigned int allocated = 0; + int error = 0; - /* - * Since we're abandoning the old bnobt/cntbt, we have to decrease - * fdblocks by the # of blocks in those trees. btreeblks counts the - * non-root blocks of the free space and rmap btrees. Do this before - * resetting the AGF counters. - */ - agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + *need_resort = false; + do { + struct xrep_abt_extent rae; + uint64_t required; + unsigned int desired; + unsigned int found; - /* rmap_blocks accounts root block, btreeblks doesn't */ - new_btblks = be32_to_cpu(agf->agf_rmap_blocks) - 1; + /* Compute how many blocks we'll need. */ + error = xfs_btree_bload_init(cnt_cur, cnt_bload, + ra->nr_real_records, 0, 0); + if (error) + break; - /* btreeblks doesn't account bno/cnt root blocks */ - to_free = pag->pagf_btreeblks + 2; + error = xfs_btree_bload_init(bno_cur, bno_bload, + ra->nr_real_records, 0, 0); + if (error) + break; - /* and don't account for the blocks we aren't freeing */ - to_free -= new_btblks; + /* How many btree blocks do we need to store all records? */ + required = cnt_bload->nr_blocks + bno_bload->nr_blocks; + ASSERT(required < INT_MAX); - /* - * Reset the per-AG info, both incore and ondisk. Mark the incore - * state stale in case we fail out of here. - */ - ASSERT(pag->pagf_init); - pag->pagf_init = 0; - pag->pagf_btreeblks = new_btblks; - pag->pagf_freeblks = 0; - pag->pagf_longest = 0; + /* If we've reserved enough blocks, we're done. */ + if (allocated >= required) + break; - agf->agf_btreeblks = cpu_to_be32(new_btblks); - agf->agf_freeblks = 0; - agf->agf_longest = 0; - *log_flags |= XFS_AGF_BTREEBLKS | XFS_AGF_LONGEST | XFS_AGF_FREEBLKS; + desired = required - allocated; - return 0; + /* We need space but there's none left; bye! */ + if (ra->nr_real_records == 0) { + error = -ENOSPC; + break; + } + + /* Grab the first record from the list. */ + error = xfbma_get(ra->free_records, record_nr, &rae); + if (error) + break; + + ASSERT(rae.len <= UINT_MAX); + found = min_t(unsigned int, rae.len, desired); + + error = xrep_newbt_add_reservation(&ra->new_bnobt_info, + XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, rae.bno), + found); + if (error) + break; + allocated += found; + ra->nr_blocks -= found; + + if (rae.len > desired) { + /* + * Record has more space than we need. The number of + * free records doesn't change, so shrink the free + * record and exit the loop. + */ + rae.bno += desired; + rae.len -= desired; + error = xfbma_set(ra->free_records, record_nr, &rae); + if (error) + break; + *need_resort = true; + break; + } else { + /* + * We're going to use up the entire record, so nullify + * it and move on to the next one. This changes the + * number of free records, so we must go around the + * loop once more to re-run _bload_init. + */ + error = xfbma_nullify(ra->free_records, record_nr); + if (error) + break; + ra->nr_real_records--; + record_nr--; + } + } while (1); + + return error; } -/* Initialize a new free space btree root and implant into AGF. */ +/* + * Deal with all the space we reserved. Blocks that were allocated for the + * free space btrees need to have a (deferred) rmap added for the OWN_AG + * allocation, and blocks that didn't get used can be freed via the usual + * (deferred) means. + */ STATIC int -xrep_abt_reset_btree( - struct xrep_abt *ra, - xfs_btnum_t btnum) +xrep_abt_dispose_reservations( + struct xrep_abt *ra) { + struct xrep_newbt_resv *resv, *n; struct xfs_scrub *sc = ra->sc; - struct xfs_buf *bp; - struct xfs_perag *pag = sc->sa.pag; - struct xfs_mount *mp = sc->mp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); - const struct xfs_buf_ops *ops; - xfs_agblock_t agbno; - int error; + int error = 0; + + for_each_xrep_newbt_reservation(&ra->new_bnobt_info, resv, n) { + /* Add a deferred rmap for each extent we used. */ + if (resv->used > 0) { + error = xfs_rmap_alloc_extent(sc->tp, resv->fsbno, + resv->used, XFS_RMAP_OWN_AG, false); + if (error) + break; + } - /* Allocate new root block. */ - agbno = xrep_abt_alloc_block(ra); - if (agbno == NULLAGBLOCK) - return -ENOSPC; - - switch (btnum) { - case XFS_BTNUM_BNOi: - ops = &xfs_bnobt_buf_ops; - break; - case XFS_BTNUM_CNTi: - ops = &xfs_cntbt_buf_ops; - break; - default: - ASSERT(0); - return -EFSCORRUPTED; + /* + * Add a deferred free for each block we didn't use and now + * have to add to the free space since the new btrees are + * online. + */ + if (resv->used < resv->len) + __xfs_bmap_add_free(sc->tp, resv->fsbno + resv->used, + resv->len - resv->used, NULL, true); } - /* Initialize new tree root. */ - error = xrep_init_btblock(sc, XFS_AGB_TO_FSB(mp, sc->sa.agno, agbno), - &bp, btnum, ops); - if (error) - return error; + for_each_xrep_newbt_reservation(&ra->new_bnobt_info, resv, n) { + list_del(&resv->list); + kmem_free(resv); + } - /* Implant into AGF. */ - agf->agf_roots[btnum] = cpu_to_be32(agbno); - agf->agf_levels[btnum] = cpu_to_be32(1); + return error; +} - /* Add rmap records for the btree roots */ - error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno, agbno, 1, - &XFS_RMAP_OINFO_AG); - if (error) - return error; +/* Retrieve free space data for bulk load. */ +STATIC int +xrep_abt_get_data( + struct xfs_btree_cur *cur, + void *priv) +{ + struct xfs_alloc_rec_incore *arec = &cur->bc_rec.a; + struct xrep_abt *ra = priv; + int error; - /* Reset the incore state. */ - pag->pagf_levels[btnum] = 1; + do { + error = xfbma_get(ra->free_records, ra->iter++, arec); + } while (error == 0 && xfbma_is_null(ra->free_records, arec)); - return 0; + ra->longest = max(ra->longest, arec->ar_blockcount); + return error; +} + +/* Feed one of the new btree blocks to the bulk loader. */ +STATIC int +xrep_abt_bload_alloc( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + void *priv) +{ + struct xrep_abt *ra = priv; + + return xrep_newbt_alloc_block(cur, &ra->new_bnobt_info, ptr); } -/* Initialize new bnobt/cntbt roots and implant them into the AGF. */ +/* + * Reset the AGF counters to reflect the free space btrees that we just + * rebuilt, then reinitialize the per-AG data. + */ STATIC int -xrep_abt_reset_btrees( +xrep_abt_reset_counters( struct xrep_abt *ra, - int *log_flags) + unsigned int freesp_btreeblks) { - int error; + struct xfs_scrub *sc = ra->sc; + struct xfs_perag *pag = sc->sa.pag; + struct xfs_agf *agf; + struct xfs_buf *bp; - error = xrep_abt_reset_btree(ra, XFS_BTNUM_BNOi); - if (error) - return error; - error = xrep_abt_reset_btree(ra, XFS_BTNUM_CNTi); - if (error) - return error; + agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); - *log_flags |= XFS_AGF_ROOTS | XFS_AGF_LEVELS; - return 0; + /* + * Mark the pagf information stale and use the accessor function to + * forcibly reload it from the values we just logged. We still own the + * AGF buffer so we can safely ignore bp. + */ + ASSERT(pag->pagf_init); + pag->pagf_init = 0; + + agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks + + (be32_to_cpu(agf->agf_rmap_blocks) - 1)); + agf->agf_freeblks = cpu_to_be32(ra->nr_blocks); + agf->agf_longest = cpu_to_be32(ra->longest); + xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS | + XFS_AGF_LONGEST | + XFS_AGF_FREEBLKS); + + return xfs_alloc_read_agf(sc->mp, sc->tp, sc->sa.agno, 0, &bp); } /* - * Make our new freespace btree roots permanent so that we can start freeing - * unused space back into the AG. + * Use the collected free space information to stage new free space btrees. + * If this is successful we'll return with the new btree root + * information logged to the repair transaction but not yet committed. */ STATIC int -xrep_abt_commit_new( - struct xrep_abt *ra, - int log_flags) +xrep_abt_build_new_trees( + struct xrep_abt *ra) { + struct xfs_btree_bload bno_bload; + struct xfs_btree_bload cnt_bload; struct xfs_scrub *sc = ra->sc; + struct xfs_btree_cur *bno_cur; + struct xfs_btree_cur *cnt_cur; + bool need_resort; int error; - xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, log_flags); - - /* Invalidate the old freespace btree blocks and commit. */ - error = xrep_invalidate_blocks(sc, &ra->old_allocbt_blocks); + /* + * Sort the free extents by length so that we can set up the free space + * btrees in as few extents as possible. This reduces the amount of + * deferred rmap / free work we have to do at the end. + */ + error = xfbma_sort(ra->free_records, xrep_cntbt_extent_cmp); if (error) return error; - error = xrep_roll_ag_trans(sc); + + /* + * Prepare to construct the new btree by reserving disk space for the + * new btree and setting up all the accounting information we'll need + * to root the new btree while it's under construction and before we + * attach it to the AG header. + */ + xrep_newbt_init_bare(&ra->new_bnobt_info, sc); + xrep_newbt_init_bare(&ra->new_cntbt_info, sc); + + /* Allocate cursors for the staged btrees. */ + bno_cur = xfs_allocbt_stage_cursor(sc->mp, sc->tp, + &ra->new_bnobt_info.afake, sc->sa.agno, XFS_BTNUM_BNO); + cnt_cur = xfs_allocbt_stage_cursor(sc->mp, sc->tp, + &ra->new_cntbt_info.afake, sc->sa.agno, XFS_BTNUM_CNT); + + /* Reserve the space we'll need for the new btrees. */ + error = xrep_abt_reserve_space(ra, bno_cur, &bno_bload, cnt_cur, + &cnt_bload, &need_resort); + if (error) + goto out_cur; + + /* + * If we need to re-sort the free extents by length, do so so that we + * can put the records into the cntbt in the correct order. + */ + if (need_resort) { + error = xfbma_sort(ra->free_records, xrep_cntbt_extent_cmp); + if (error) + goto out_cur; + } + + /* Load the free space by length tree. */ + ra->iter = 0; + ra->longest = 0; + error = xfs_btree_bload(cnt_cur, &cnt_bload, xrep_abt_get_data, + xrep_abt_bload_alloc, ra); + if (error) + goto out_cur; + + /* Re-sort the free extents by block number so so that we can put the + * records into the bnobt in the correct order. + */ + error = xfbma_sort(ra->free_records, xrep_bnobt_extent_cmp); + if (error) + goto out_cur; + + /* Load the free space by block number tree. */ + ra->iter = 0; + error = xfs_btree_bload(bno_cur, &bno_bload, xrep_abt_get_data, + xrep_abt_bload_alloc, ra); + if (error) + goto out_cur; + + /* + * Install the new btrees in the AG header. After this point the old + * btree is no longer accessible and the new tree is live. + * + * Note: We re-read the AGF here to ensure the buffer type is set + * properly. Since we built a new tree without attaching to the AGF + * buffer, the buffer item may have fallen off the buffer. This ought + * to succeed since the AGF is held across transaction rolls. + */ + error = xfs_read_agf(sc->mp, sc->tp, sc->sa.agno, 0, &sc->sa.agf_bp); + if (error) + goto out_cur; + + /* Commit our new btrees. */ + xfs_allocbt_commit_staged_btree(bno_cur, sc->sa.agf_bp); + xfs_btree_del_cursor(bno_cur, 0); + xfs_allocbt_commit_staged_btree(cnt_cur, sc->sa.agf_bp); + xfs_btree_del_cursor(cnt_cur, 0); + + /* Reset the AGF counters now that we've changed the btree shape. */ + error = xrep_abt_reset_counters(ra, (bno_bload.nr_blocks - 1) + + (cnt_bload.nr_blocks - 1)); + if (error) + goto out_newbt; + + /* Dispose of any unused blocks and the accounting infomation. */ + error = xrep_abt_dispose_reservations(ra); if (error) return error; - /* Now that we've succeeded, mark the incore state valid again. */ - sc->sa.pag->pagf_init = 1; - return 0; + return xrep_roll_ag_trans(sc); + +out_cur: + xfs_btree_del_cursor(cnt_cur, error); + xfs_btree_del_cursor(bno_cur, error); +out_newbt: + xrep_abt_dispose_reservations(ra); + return error; } -/* Build new free space btrees and dispose of the old one. */ +/* + * Now that we've logged the roots of the new btrees, invalidate all of the + * old blocks and free them. + */ STATIC int -xrep_abt_rebuild_trees( +xrep_abt_remove_old_trees( struct xrep_abt *ra) { - struct xrep_abt_extent rae; struct xfs_scrub *sc = ra->sc; int error; - /* - * Insert the longest free extent in case it's necessary to - * refresh the AGFL with multiple blocks. If there is no longest - * extent, we had exactly the free space we needed; we're done. - */ - error = xrep_abt_get_longest(ra, &rae); - if (!error && rae.len > 0) { - error = xrep_abt_free_extent(&rae, ra); - if (error) - return error; - } + /* Invalidate the old freespace btree blocks and commit. */ + error = xrep_invalidate_blocks(sc, &ra->old_allocbt_blocks); + if (error) + return error; + error = xrep_roll_ag_trans(sc); + if (error) + return error; /* Free all the OWN_AG blocks that are not in the rmapbt/agfl. */ error = xrep_reap_extents(sc, &ra->old_allocbt_blocks, @@ -495,8 +629,8 @@ xrep_abt_rebuild_trees( if (error) return error; - /* Insert records into the new btrees. */ - return xfbma_iter_del(ra->free_records, xrep_abt_free_extent, ra); + sc->flags |= XREP_RESET_PERAG_RESV; + return 0; } /* Repair the freespace btrees for some AG. */ @@ -506,7 +640,6 @@ xrep_allocbt( { struct xrep_abt *ra; struct xfs_mount *mp = sc->mp; - int log_flags = 0; int error; /* We require the rmapbt to rebuild anything. */ @@ -543,36 +676,14 @@ xrep_allocbt( if (error) goto out_bitmap; - /* Make sure we got some free space. */ - if (xfbma_length(ra->free_records) == 0) { - error = -ENOSPC; - goto out_bitmap; - } - - /* - * Sort the free extents by block number to avoid bnobt splits when we - * rebuild the free space btrees. - */ - error = xfbma_sort(ra->free_records, xrep_abt_extent_cmp); + /* Rebuild the free space information. */ + error = xrep_abt_build_new_trees(ra); if (error) goto out_bitmap; - /* - * Blow out the old free space btrees. This is the point at which - * we are no longer able to bail out gracefully. - */ - error = xrep_abt_reset_counters(sc, &log_flags); - if (error) - goto out_bitmap; - error = xrep_abt_reset_btrees(ra, &log_flags); - if (error) - goto out_bitmap; - error = xrep_abt_commit_new(ra, log_flags); - if (error) - goto out_bitmap; + /* Kill the old trees. */ + error = xrep_abt_remove_old_trees(ra); - /* Now rebuild the freespace information. */ - error = xrep_abt_rebuild_trees(ra); out_bitmap: xfs_bitmap_destroy(&ra->old_allocbt_blocks); xfbma_destroy(ra->free_records); diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 3ecef3883b08..e14279deb0e1 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -378,6 +378,19 @@ xrep_newbt_init( INIT_LIST_HEAD(&xnr->reservations); } +/* + * Initialize accounting resources for staging a new btree. Callers are + * expected to add their own reservations (and clean them up) manually. + */ +void +xrep_newbt_init_bare( + struct xrep_newbt *xnr, + struct xfs_scrub *sc) +{ + xrep_newbt_init(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK, + XFS_AG_RESV_NONE); +} + /* Add a space reservation manually. */ int xrep_newbt_add_reservation( @@ -510,7 +523,7 @@ xrep_newbt_alloc_block( */ if (xnr->last_resv == NULL) { list_for_each_entry(resv, &xnr->reservations, list) { - if (resv->used < xnr->last_resv->len) { + if (resv->used < resv->len) { xnr->last_resv = resv; break; } diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 547d916ba367..241ddd8fe6dd 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -124,9 +124,13 @@ struct xrep_newbt { enum xfs_ag_resv_type resv; }; +#define for_each_xrep_newbt_reservation(xnr, resv, n) \ + list_for_each_entry_safe((resv), (n), &(xnr)->reservations, list) + void xrep_newbt_init(struct xrep_newbt *xba, struct xfs_scrub *sc, const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint, enum xfs_ag_resv_type resv); +void xrep_newbt_init_bare(struct xrep_newbt *xba, struct xfs_scrub *sc); int xrep_newbt_add_reservation(struct xrep_newbt *xba, xfs_fsblock_t fsbno, xfs_extlen_t len); int xrep_newbt_reserve_space(struct xrep_newbt *xba, uint64_t nr_blocks); |