From ebbf385a912f3d1e5c13d7e33101e8aefb300f15 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 Sep 2021 10:45:54 -0700 Subject: xfs: repair inode block maps Use the reverse-mapping btree information to rebuild an inode block map. Update the btree bulk loading code as necessary to support inode rooted btrees and fix some bitrot problems. Signed-off-by: Darrick J. Wong --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_bmap_btree.c | 115 ++++++- fs/xfs/libxfs/xfs_bmap_btree.h | 5 + fs/xfs/libxfs/xfs_btree_staging.c | 11 +- fs/xfs/libxfs/xfs_btree_staging.h | 8 +- fs/xfs/libxfs/xfs_iext_tree.c | 23 +- fs/xfs/libxfs/xfs_inode_fork.c | 1 + fs/xfs/libxfs/xfs_inode_fork.h | 3 + fs/xfs/scrub/bmap.c | 22 ++ fs/xfs/scrub/bmap_repair.c | 639 ++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/repair.c | 33 ++ fs/xfs/scrub/repair.h | 6 + fs/xfs/scrub/scrub.c | 4 +- fs/xfs/scrub/trace.h | 34 +- fs/xfs/xfs_trans.c | 88 ++++++ fs/xfs/xfs_trans.h | 4 + 16 files changed, 961 insertions(+), 36 deletions(-) create mode 100644 fs/xfs/scrub/bmap_repair.c (limited to 'fs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 9ba3ac978ded..c9f62877fc18 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -169,6 +169,7 @@ xfs-y += $(addprefix scrub/, \ agheader_repair.o \ alloc_repair.o \ bitmap.o \ + bmap_repair.o \ ialloc_repair.o \ inode_repair.o \ refcount_repair.o \ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 72444b8b38a6..41e84c1bf7a9 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -15,6 +15,7 @@ #include "xfs_trans.h" #include "xfs_alloc.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_error.h" @@ -300,10 +301,7 @@ xfs_bmbt_get_minrecs( int level) { if (level == cur->bc_nlevels - 1) { - struct xfs_ifork *ifp; - - ifp = XFS_IFORK_PTR(cur->bc_ino.ip, - cur->bc_ino.whichfork); + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); return xfs_bmbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, level == 0) / 2; @@ -318,10 +316,7 @@ xfs_bmbt_get_maxrecs( int level) { if (level == cur->bc_nlevels - 1) { - struct xfs_ifork *ifp; - - ifp = XFS_IFORK_PTR(cur->bc_ino.ip, - cur->bc_ino.whichfork); + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); return xfs_bmbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, level == 0); @@ -541,40 +536,126 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { /* * Allocate a new bmap btree cursor. */ -struct xfs_btree_cur * /* new bmap btree cursor */ -xfs_bmbt_init_cursor( +static struct xfs_btree_cur * /* new bmap btree cursor */ +xfs_bmbt_init_common( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* inode owning the btree */ - int whichfork) /* data or attr fork */ + struct xfs_inode *ip) /* inode owning the btree */ { - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_cur *cur; - ASSERT(whichfork != XFS_COW_FORK); cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; - cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; cur->bc_btnum = XFS_BTNUM_BMAP; cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2); - cur->bc_ops = &xfs_bmbt_ops; cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_ino.forksize = XFS_IFORK_SIZE(ip, whichfork); cur->bc_ino.ip = ip; cur->bc_ino.allocated = 0; cur->bc_ino.flags = 0; + cur->bc_ops = &xfs_bmbt_ops; + + return cur; +} + +/* + * Allocate a new bmap btree cursor. + */ +struct xfs_btree_cur * /* new bmap btree cursor */ +xfs_bmbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* inode owning the btree */ + int whichfork) /* data or attr fork */ +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_cur *cur; + ASSERT(whichfork != XFS_COW_FORK); + + cur = xfs_bmbt_init_common(mp, tp, ip); + + cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; + cur->bc_ino.forksize = XFS_IFORK_SIZE(ip, whichfork); cur->bc_ino.whichfork = whichfork; return cur; } +/* + * Allocate a new bmap btree cursor for reloading an inode block mapping data + * structure. Note that callers can use the staged cursor to reload extents + * format inode forks if they rebuild the iext tree and commit the staged + * cursor immediately. + */ +struct xfs_btree_cur * +xfs_bmbt_stage_cursor( + struct xfs_mount *mp, + struct xfs_inode *ip, + struct xbtree_ifakeroot *ifake) +{ + struct xfs_btree_cur *cur; + struct xfs_btree_ops *ops; + + cur = xfs_bmbt_init_common(mp, NULL, ip); + cur->bc_nlevels = ifake->if_levels; + cur->bc_ino.forksize = ifake->if_fork_size; + cur->bc_ino.whichfork = -1; + xfs_btree_stage_ifakeroot(cur, ifake, &ops); + ops->update_cursor = NULL; + return cur; +} + +/* + * Swap in the new inode fork root. Once we pass this point the newly rebuilt + * mappings are in place and we have to kill off any old btree blocks. + */ +void +xfs_bmbt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + int whichfork) +{ + struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake; + struct xfs_ifork *ifp; + static const short brootflag[2] = + { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; + static const short extflag[2] = + { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; + int flags = XFS_ILOG_CORE; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + ASSERT(whichfork != XFS_COW_FORK); + + /* + * Free any resources hanging off the real fork, then shallow-copy the + * staging fork's contents into the real fork to transfer everything + * we just built. + */ + ifp = XFS_IFORK_PTR(cur->bc_ino.ip, whichfork); + xfs_idestroy_fork(ifp); + memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork)); + + switch (ifp->if_format) { + case XFS_DINODE_FMT_EXTENTS: + flags |= extflag[whichfork]; + break; + case XFS_DINODE_FMT_BTREE: + flags |= brootflag[whichfork]; + break; + default: + ASSERT(0); + break; + } + xfs_trans_log_inode(tp, cur->bc_ino.ip, flags); + xfs_btree_commit_ifakeroot(cur, tp, whichfork, &xfs_bmbt_ops); +} + /* * Calculate number of records in a bmap btree block. */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 729e3bc569be..55796051760c 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -11,6 +11,7 @@ struct xfs_btree_block; struct xfs_mount; struct xfs_inode; struct xfs_trans; +struct xbtree_ifakeroot; /* * Btree block header size depends on a superblock flag. @@ -106,6 +107,10 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); +struct xfs_btree_cur *xfs_bmbt_stage_cursor(struct xfs_mount *mp, + struct xfs_inode *ip, struct xbtree_ifakeroot *ifake); +void xfs_bmbt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, int whichfork); extern unsigned long long xfs_bmbt_calc_size(struct xfs_mount *mp, unsigned long long len); diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index ac9e80152b5c..6565439d7a46 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -384,7 +384,7 @@ xfs_btree_bload_prep_block( ASSERT(*bpp == NULL); /* Allocate a new incore btree root block. */ - new_size = bbl->iroot_size(cur, nr_this_block, priv); + new_size = bbl->iroot_size(cur, level, nr_this_block, priv); ifp->if_broot = kmem_zalloc(new_size, 0); ifp->if_broot_bytes = (int)new_size; @@ -570,7 +570,14 @@ xfs_btree_bload_level_geometry( unsigned int desired_npb; unsigned int maxnr; - maxnr = cur->bc_ops->get_maxrecs(cur, level); + /* + * Compute the absolute maximum number of records that we can store in + * the ondisk block or inode root. + */ + if (cur->bc_ops->get_dmaxrecs) + maxnr = cur->bc_ops->get_dmaxrecs(cur, level); + else + maxnr = cur->bc_ops->get_maxrecs(cur, level); /* * Compute the number of blocks we need to fill each block with the diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h index f0d2976050ae..56863e2999a2 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.h +++ b/fs/xfs/libxfs/xfs_btree_staging.h @@ -37,12 +37,6 @@ struct xbtree_ifakeroot { /* Number of bytes available for this fork in the inode. */ unsigned int if_fork_size; - - /* Fork format. */ - unsigned int if_format; - - /* Number of records. */ - unsigned int if_extents; }; /* Cursor interactions with fake roots for inode-rooted btrees. */ @@ -57,7 +51,7 @@ typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *pr typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, void *priv); typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur, - unsigned int nr_this_level, void *priv); + unsigned int level, unsigned int nr_this_level, void *priv); struct xfs_btree_bload { /* diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 773cf4349428..d062794cc795 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -622,13 +622,11 @@ static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp) } void -xfs_iext_insert( - struct xfs_inode *ip, +xfs_iext_insert_raw( + struct xfs_ifork *ifp, struct xfs_iext_cursor *cur, - struct xfs_bmbt_irec *irec, - int state) + struct xfs_bmbt_irec *irec) { - struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); xfs_fileoff_t offset = irec->br_startoff; struct xfs_iext_leaf *new = NULL; int nr_entries, i; @@ -662,12 +660,23 @@ xfs_iext_insert( xfs_iext_set(cur_rec(cur), irec); ifp->if_bytes += sizeof(struct xfs_iext_rec); - trace_xfs_iext_insert(ip, cur, state, _RET_IP_); - if (new) xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); } +void +xfs_iext_insert( + struct xfs_inode *ip, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *irec, + int state) +{ + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); + + xfs_iext_insert_raw(ifp, cur, irec); + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); +} + static struct xfs_iext_node * xfs_iext_rebalance_node( struct xfs_iext_node *parent, diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 1d174909f9bd..396e82365888 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -507,6 +507,7 @@ xfs_idata_realloc( ifp->if_bytes = new_size; } +/* Free all memory and reset a fork back to its initial state. */ void xfs_idestroy_fork( struct xfs_ifork *ifp) diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index a6f7897b6887..5a6f4c4b1c1e 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -152,6 +152,9 @@ void xfs_init_local_fork(struct xfs_inode *ip, int whichfork, const void *data, int64_t size); xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp); +void xfs_iext_insert_raw(struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *irec); void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *, int); void xfs_iext_remove(struct xfs_inode *, struct xfs_iext_cursor *, diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index cd6ec11c5b24..1ee080608c1f 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -29,6 +29,7 @@ int xchk_setup_inode_bmap( struct xfs_scrub *sc) { + bool is_repair = false; int error; error = xchk_get_inode(sc); @@ -37,6 +38,10 @@ xchk_setup_inode_bmap( xchk_ilock(sc, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); +#ifdef CONFIG_XFS_REPAIR + is_repair = (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR); +#endif + /* * We don't want any ephemeral data fork updates sitting around * while we inspect block mappings, so wait for directio to finish @@ -46,6 +51,14 @@ xchk_setup_inode_bmap( sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) { struct address_space *mapping = VFS_I(sc->ip)->i_mapping; + /* Break all our leases, we're going to mess with things. */ + if (is_repair) { + error = xfs_break_layouts(VFS_I(sc->ip), + &sc->ilock_flags, BREAK_UNMAP); + if (error) + goto out; + } + inode_dio_wait(VFS_I(sc->ip)); /* @@ -66,6 +79,15 @@ xchk_setup_inode_bmap( error = filemap_fdatawait_keep_errors(mapping); if (error && (error != -ENOSPC && error != -EIO)) goto out; + + /* Drop the page cache if we're repairing block mappings. */ + if (is_repair) { + error = invalidate_inode_pages2( + VFS_I(sc->ip)->i_mapping); + if (error) + goto out; + } + } /* Got the inode, lock it and we're ready to go. */ diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c new file mode 100644 index 000000000000..08e50b3f3ec8 --- /dev/null +++ b/fs/xfs/scrub/bmap_repair.c @@ -0,0 +1,639 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2021 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_inode_fork.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "xfs_refcount.h" +#include "xfs_quota.h" +#include "xfs_ialloc.h" +#include "xfs_ag.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/bitmap.h" +#include "scrub/xfarray.h" + +/* + * Inode Fork Block Mapping (BMBT) Repair + * ====================================== + * + * Gather all the rmap records for the inode and fork we're fixing, reset the + * incore fork, then recreate the btree. + */ +struct xrep_bmap { + /* Old bmbt blocks */ + struct xbitmap old_bmbt_blocks; + + /* New fork. */ + struct xrep_newbt new_fork_info; + struct xfs_btree_bload bmap_bload; + + /* List of new bmap records. */ + struct xfarray *bmap_records; + + struct xfs_scrub *sc; + + /* How many blocks did we find allocated to this file? */ + xfs_rfsblock_t nblocks; + + /* How many bmbt blocks did we find for this fork? */ + xfs_rfsblock_t old_bmbt_block_count; + + /* get_record()'s position in the free space record array. */ + uint64_t iter; + + /* Which fork are we fixing? */ + int whichfork; +}; + +/* Remember this reverse-mapping as a series of bmap records. */ +STATIC int +xrep_bmap_from_rmap( + struct xrep_bmap *rb, + xfs_fileoff_t startoff, + xfs_fsblock_t startblock, + xfs_filblks_t blockcount, + bool unwritten) +{ + struct xfs_bmbt_rec rbe; + struct xfs_bmbt_irec irec; + int error = 0; + + irec.br_startoff = startoff; + irec.br_startblock = startblock; + irec.br_state = unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM; + + do { + irec.br_blockcount = min_t(xfs_filblks_t, blockcount, + MAXEXTLEN); + xfs_bmbt_disk_set_all(&rbe, &irec); + + trace_xrep_bmap_found(rb->sc->ip, rb->whichfork, &irec); + + if (xchk_should_terminate(rb->sc, &error)) + return error; + + error = xfarray_append(rb->bmap_records, &rbe); + if (error) + return error; + + irec.br_startblock += irec.br_blockcount; + irec.br_startoff += irec.br_blockcount; + blockcount -= irec.br_blockcount; + } while (blockcount > 0); + + return 0; +} + +/* Check for any obvious errors or conflicts in the file mapping. */ +STATIC int +xrep_bmap_check_fork_rmap( + struct xrep_bmap *rb, + const struct xfs_rmap_irec *rec) +{ + struct xfs_scrub *sc = rb->sc; + bool is_freesp, has_inodes; + int error; + + /* Data extents for rt files are never stored on the data device. */ + if (XFS_IS_REALTIME_INODE(sc->ip) && + !(rec->rm_flags & XFS_RMAP_ATTR_FORK)) + return -EFSCORRUPTED; + + /* Check the file offsets and physical extents. */ + if (!xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount)) + return -EFSCORRUPTED; + + /* Check that this is within the AG. */ + if (!xfs_verify_agbext(sc->mp, sc->sa.pag->pag_agno, rec->rm_startblock, + rec->rm_blockcount)) + return -EFSCORRUPTED; + + /* Make sure this isn't free space. */ + error = xfs_alloc_has_record(sc->sa.bno_cur, rec->rm_startblock, + rec->rm_blockcount, &is_freesp); + if (error) + return error; + if (is_freesp) + return -EFSCORRUPTED; + + /* Must not be an inode chunk. */ + error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur, + rec->rm_startblock, rec->rm_blockcount, &has_inodes); + if (error) + return error; + if (has_inodes) + return -EFSCORRUPTED; + + return 0; +} + +/* Remember any old bmbt blocks we find so we can delete them later. */ +STATIC int +xrep_bmap_record_old_bmbt_blocks( + struct xrep_bmap *rb, + const struct xfs_rmap_irec *rec) +{ + struct xfs_scrub *sc = rb->sc; + struct xfs_mount *mp = sc->mp; + xfs_fsblock_t fsbno; + + if (!xfs_verify_agbext(mp, sc->sa.pag->pag_agno, rec->rm_startblock, + rec->rm_blockcount)) + return -EFSCORRUPTED; + + rb->nblocks += rec->rm_blockcount; + + fsbno = XFS_AGB_TO_FSB(mp, sc->sa.pag->pag_agno, rec->rm_startblock); + rb->old_bmbt_block_count += rec->rm_blockcount; + return xbitmap_set(&rb->old_bmbt_blocks, fsbno, rec->rm_blockcount); +} + +/* Record extents that belong to this inode's fork. */ +STATIC int +xrep_bmap_walk_rmap( + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *rec, + void *priv) +{ + struct xrep_bmap *rb = priv; + struct xfs_mount *mp = cur->bc_mp; + xfs_fsblock_t fsbno; + int error = 0; + + if (xchk_should_terminate(rb->sc, &error)) + return error; + + if (rec->rm_owner != rb->sc->ip->i_ino) + return 0; + + if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) + return xrep_bmap_record_old_bmbt_blocks(rb, rec); + + error = xrep_bmap_check_fork_rmap(rb, rec); + if (error) + return error; + + /* + * Record all blocks allocated to this file even if the extent isn't + * for the fork we're rebuilding so that we can reset di_nblocks later. + */ + rb->nblocks += rec->rm_blockcount; + + /* If this rmap isn't for the fork we want, we're done. */ + if (rb->whichfork == XFS_DATA_FORK && + (rec->rm_flags & XFS_RMAP_ATTR_FORK)) + return 0; + if (rb->whichfork == XFS_ATTR_FORK && + !(rec->rm_flags & XFS_RMAP_ATTR_FORK)) + return 0; + + fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, + rec->rm_startblock); + return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno, + rec->rm_blockcount, + rec->rm_flags & XFS_RMAP_UNWRITTEN); +} + +/* Compare two bmap extents. */ +static int +xrep_bmap_extent_cmp( + const void *a, + const void *b) +{ + xfs_fileoff_t ao; + xfs_fileoff_t bo; + + ao = xfs_bmbt_disk_get_startoff((struct xfs_bmbt_rec *)a); + bo = xfs_bmbt_disk_get_startoff((struct xfs_bmbt_rec *)b); + + if (ao > bo) + return 1; + else if (ao < bo) + return -1; + return 0; +} + +/* Scan one AG for reverse mappings that we can turn into extent maps. */ +STATIC int +xrep_bmap_scan_ag( + struct xrep_bmap *rb, + struct xfs_perag *pag) +{ + struct xfs_scrub *sc = rb->sc; + int error; + + error = xrep_ag_init(sc, pag, &sc->sa); + if (error) + return error; + + error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb); + xchk_ag_free(sc, &sc->sa); + return error; +} + +/* + * Collect block mappings for this fork of this inode and decide if we have + * enough space to rebuild. Caller is responsible for cleaning up the list if + * anything goes wrong. + */ +STATIC int +xrep_bmap_find_mappings( + struct xrep_bmap *rb) +{ + struct xfs_scrub *sc = rb->sc; + struct xfs_perag *pag; + xfs_agnumber_t agno; + int error = 0; + + /* Iterate the rmaps for extents. */ + for_each_perag(sc->mp, agno, pag) { + error = xrep_bmap_scan_ag(rb, pag); + if (error) { + xfs_perag_put(pag); + return error; + } + } + + return 0; +} + +/* Retrieve bmap data for bulk load. */ +STATIC int +xrep_bmap_get_record( + struct xfs_btree_cur *cur, + void *priv) +{ + struct xfs_bmbt_rec rec; + struct xfs_bmbt_irec *irec = &cur->bc_rec.b; + struct xrep_bmap *rb = priv; + int error; + + error = xfarray_load_next(rb->bmap_records, &rb->iter, &rec); + if (error) + return error; + + xfs_bmbt_disk_get_all(&rec, irec); + return 0; +} + +/* Feed one of the new btree blocks to the bulk loader. */ +STATIC int +xrep_bmap_claim_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + void *priv) +{ + struct xrep_bmap *rb = priv; + int error; + + error = xrep_newbt_relog_efis(&rb->new_fork_info); + if (error) + return error; + + return xrep_newbt_claim_block(cur, &rb->new_fork_info, ptr); +} + +/* Figure out how much space we need to create the incore btree root block. */ +STATIC size_t +xrep_bmap_iroot_size( + struct xfs_btree_cur *cur, + unsigned int level, + unsigned int nr_this_level, + void *priv) +{ + ASSERT(level > 0); + + return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level); +} + +/* Update the inode counters. */ +STATIC int +xrep_bmap_reset_counters( + struct xrep_bmap *rb) +{ + struct xfs_scrub *sc = rb->sc; + struct xbtree_ifakeroot *ifake = &rb->new_fork_info.ifake; + int64_t delta; + + /* + * Update the inode block counts to reflect the extents we found in the + * rmapbt. + */ + delta = ifake->if_blocks - rb->old_bmbt_block_count; + sc->ip->i_nblocks = rb->nblocks + delta; + xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); + + /* + * Adjust the quota counts by the difference in size between the old + * and new bmbt. + */ + xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta); + return 0; +} + +/* Create a new iext tree and load it with block mappings. */ +STATIC int +xrep_bmap_extents_load( + struct xrep_bmap *rb, + struct xfs_btree_cur *bmap_cur) +{ + struct xfs_iext_cursor icur; + struct xbtree_ifakeroot *ifake = &rb->new_fork_info.ifake; + struct xfs_ifork *ifp = ifake->if_fork; + unsigned int i; + int error; + + ASSERT(ifp->if_bytes == 0); + + /* Add all the records to the incore extent tree. */ + rb->iter = 0; + xfs_iext_first(ifp, &icur); + for (i = 0; i < ifp->if_nextents; i++) { + error = xrep_bmap_get_record(bmap_cur, rb); + if (error) + return error; + xfs_iext_insert_raw(ifp, &icur, &bmap_cur->bc_rec.b); + xfs_iext_next(ifp, &icur); + } + + return 0; +} + +/* Reserve new btree blocks and bulk load all the bmap records. */ +STATIC int +xrep_bmap_btree_load( + struct xrep_bmap *rb, + struct xfs_btree_cur *bmap_cur) +{ + struct xfs_scrub *sc = rb->sc; + struct xbtree_ifakeroot *ifake = &rb->new_fork_info.ifake; + int error; + + rb->bmap_bload.get_record = xrep_bmap_get_record; + rb->bmap_bload.claim_block = xrep_bmap_claim_block; + rb->bmap_bload.iroot_size = xrep_bmap_iroot_size; + xrep_bload_estimate_slack(sc, &rb->bmap_bload); + + /* Compute how many blocks we'll need. */ + error = xfs_btree_bload_compute_geometry(bmap_cur, &rb->bmap_bload, + ifake->if_fork->if_nextents); + if (error) + return error; + + /* + * Guess how many blocks we're going to need to rebuild an entire bmap + * from the number of extents we found, and pump up our transaction to + * have sufficient block reservation. + */ + error = xfs_trans_reserve_more_inode(sc->tp, sc->ip, + rb->bmap_bload.nr_blocks, 0); + if (error) + return error; + + /* Reserve the space we'll need for the new btree. */ + error = xrep_newbt_alloc_blocks(&rb->new_fork_info, + rb->bmap_bload.nr_blocks); + if (error) + return error; + + /* Add all observed bmap records. */ + rb->iter = 0; + return xfs_btree_bload(bmap_cur, &rb->bmap_bload, rb); +} + +/* + * Use the collected bmap information to stage a new bmap fork. If this is + * successful we'll return with the new fork information logged to the repair + * transaction but not yet committed. The caller must ensure that the inode + * is joined to the transaction; the inode will be joined to a clean + * transaction when the function returns. + */ +STATIC int +xrep_bmap_build_new_fork( + struct xrep_bmap *rb) +{ + struct xfs_owner_info oinfo; + struct xfs_scrub *sc = rb->sc; + struct xfs_btree_cur *bmap_cur; + struct xbtree_ifakeroot *ifake = &rb->new_fork_info.ifake; + int error; + + /* + * Sort the bmap extents by startblock to avoid btree splits when we + * rebuild the bmbt btree. + */ + error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp); + if (error) + return error; + + /* + * Prepare to construct the new fork by initializing the new btree + * structure and creating a fake ifork in the ifakeroot structure. + */ + xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork); + xrep_newbt_init_inode(&rb->new_fork_info, sc, rb->whichfork, &oinfo); + bmap_cur = xfs_bmbt_stage_cursor(sc->mp, sc->ip, ifake); + + /* + * Figure out the size and format of the new fork, then fill it with + * all the bmap records we've found. Join the inode to the transaction + * so that we can roll the transaction while holding the inode locked. + */ + ifake->if_fork->if_nextents = xfarray_length(rb->bmap_records); + if (XFS_BMDR_SPACE_CALC(ifake->if_fork->if_nextents) <= + XFS_IFORK_SIZE(sc->ip, rb->whichfork)) { + ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS; + error = xrep_bmap_extents_load(rb, bmap_cur); + } else { + ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE; + error = xrep_bmap_btree_load(rb, bmap_cur); + } + if (error) + goto err_cur; + + /* + * Install the new fork in the inode. After this point the old mapping + * data are no longer accessible and the new tree is live. We delete + * the cursor immediately after committing the staged root because the + * staged fork might be in extents format. + */ + xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork); + xfs_btree_del_cursor(bmap_cur, 0); + + /* Reset the inode counters now that we've changed the fork. */ + error = xrep_bmap_reset_counters(rb); + if (error) + goto err_newbt; + + /* Dispose of any unused blocks and the accounting information. */ + xrep_newbt_destroy(&rb->new_fork_info, error); + return xrep_roll_trans(sc); + +err_cur: + if (bmap_cur) + xfs_btree_del_cursor(bmap_cur, error); +err_newbt: + xrep_newbt_destroy(&rb->new_fork_info, error); + return error; +} + +/* + * Now that we've logged the new inode btree, invalidate all of the old blocks + * and free them, if there were any. + */ +STATIC int +xrep_bmap_remove_old_tree( + struct xrep_bmap *rb) +{ + struct xfs_scrub *sc = rb->sc; + struct xfs_owner_info oinfo; + + /* Free the old bmbt blocks if they're not in use. */ + xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork); + return xrep_reap_extents(sc, &rb->old_bmbt_blocks, &oinfo, + XFS_AG_RESV_NONE); +} + +/* Check for garbage inputs. */ +STATIC int +xrep_bmap_check_inputs( + struct xfs_scrub *sc, + int whichfork) +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(sc->ip, whichfork); + + ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK); + + /* No fork means nothing to rebuild. */ + if (!ifp) + return -ENOENT; + + /* + * Don't know how to repair the other fork formats. Scrub should + * never ask us to repair a local/uuid/dev format fork, so this is + * "theoretically" impossible. + */ + if (ifp->if_format != XFS_DINODE_FMT_EXTENTS && + ifp->if_format != XFS_DINODE_FMT_BTREE) + return -EOPNOTSUPP; + + if (whichfork == XFS_ATTR_FORK) + return 0; + + /* Only files, symlinks, and directories get to have data forks. */ + switch (VFS_I(sc->ip)->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + /* ok */ + break; + default: + return -EINVAL; + } + + /* If we somehow have delalloc extents, forget it. */ + if (sc->ip->i_delayed_blks) + return -EBUSY; + + /* Don't know how to rebuild realtime data forks. */ + if (XFS_IS_REALTIME_INODE(sc->ip)) + return -EOPNOTSUPP; + + return 0; +} + +/* Repair an inode fork. */ +STATIC int +xrep_bmap( + struct xfs_scrub *sc, + int whichfork) +{ + struct xrep_bmap *rb; + int error = 0; + + error = xrep_bmap_check_inputs(sc, whichfork); + if (error) + return error; + + rb = kmem_zalloc(sizeof(struct xrep_bmap), KM_NOFS | KM_MAYFAIL); + if (!rb) + return -ENOMEM; + rb->sc = sc; + rb->whichfork = whichfork; + + /* Set up some storage */ + rb->bmap_records = xfarray_create("bmap records", + sizeof(struct xfs_bmbt_rec)); + if (IS_ERR(rb->bmap_records)) { + error = PTR_ERR(rb->bmap_records); + goto out_rb; + } + + /* Collect all reverse mappings for this fork's extents. */ + xbitmap_init(&rb->old_bmbt_blocks); + error = xrep_bmap_find_mappings(rb); + if (error) + goto out_bitmap; + + /* Attach quotas and inode to transaction. */ + xfs_trans_ijoin(sc->tp, sc->ip, 0); + error = xrep_ino_dqattach(sc); + if (error) + return error; + + /* Rebuild the bmap information. */ + error = xrep_bmap_build_new_fork(rb); + if (error) + goto out_bitmap; + + /* Kill the old tree. */ + error = xrep_bmap_remove_old_tree(rb); + +out_bitmap: + xbitmap_destroy(&rb->old_bmbt_blocks); + xfarray_destroy(rb->bmap_records); +out_rb: + kmem_free(rb); + return error; +} + +/* Repair an inode's data fork. */ +int +xrep_bmap_data( + struct xfs_scrub *sc) +{ + return xrep_bmap(sc, XFS_DATA_FORK); +} + +/* Repair an inode's attr fork. */ +int +xrep_bmap_attr( + struct xfs_scrub *sc) +{ + return xrep_bmap(sc, XFS_ATTR_FORK); +} diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 20667b8b2754..badd716eea75 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -1564,6 +1564,39 @@ xrep_ag_btcur_init( sa->agf_bp, sc->sa.pag); } +/* Given a reference to a perag structure, load AG headers and cursors. */ +int +xrep_ag_init( + struct xfs_scrub *sc, + struct xfs_perag *pag, + struct xchk_ag *sa) +{ + int error; + + ASSERT(!sa->pag); + + error = xfs_ialloc_read_agi(sc->mp, sc->tp, pag->pag_agno, + &sa->agi_bp); + if (error) + return error; + + error = xfs_alloc_read_agf(sc->mp, sc->tp, pag->pag_agno, 0, + &sa->agf_bp); + if (error) + return error; + + error = xfs_alloc_read_agfl(sc->mp, sc->tp, pag->pag_agno, + &sa->agfl_bp); + if (error) + return error; + + /* Grab our own reference to the perag structure. */ + atomic_inc(&pag->pag_ref); + sa->pag = pag; + xrep_ag_btcur_init(sc, sa); + return 0; +} + /* Reinitialize the per-AG block reservation for the AG we just fixed. */ int xrep_reset_perag_resv( diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index fa567b6eefdc..09750bc658c3 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -71,6 +71,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc); int xrep_reset_perag_resv(struct xfs_scrub *sc); void xrep_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa); +int xrep_ag_init(struct xfs_scrub *sc, struct xfs_perag *pag, + struct xchk_ag *sa); /* Metadata revalidators */ @@ -88,6 +90,8 @@ int xrep_allocbt(struct xfs_scrub *sc); int xrep_iallocbt(struct xfs_scrub *sc); int xrep_refcountbt(struct xfs_scrub *sc); int xrep_inode(struct xfs_scrub *sc); +int xrep_bmap_data(struct xfs_scrub *sc); +int xrep_bmap_attr(struct xfs_scrub *sc); struct xrep_newbt_resv { /* Link to list of extents that we've reserved. */ @@ -187,6 +191,8 @@ xrep_reset_perag_resv( #define xrep_iallocbt xrep_notsupported #define xrep_refcountbt xrep_notsupported #define xrep_inode xrep_notsupported +#define xrep_bmap_data xrep_notsupported +#define xrep_bmap_attr xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index bf6a505eb77f..abd74fff3bf5 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -269,13 +269,13 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .type = ST_INODE, .setup = xchk_setup_inode_bmap, .scrub = xchk_bmap_data, - .repair = xrep_notsupported, + .repair = xrep_bmap_data, }, [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ .type = ST_INODE, .setup = xchk_setup_inode_bmap, .scrub = xchk_bmap_attr, - .repair = xrep_notsupported, + .repair = xrep_bmap_attr, }, [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ .type = ST_INODE, diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index bc1c3feb3a12..25b068714636 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -901,7 +901,7 @@ DEFINE_EVENT(xrep_rmap_class, name, \ TP_ARGS(mp, agno, agbno, len, owner, offset, flags)) DEFINE_REPAIR_RMAP_EVENT(xrep_ibt_walk_rmap); DEFINE_REPAIR_RMAP_EVENT(xrep_rmap_extent_fn); -DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_extent_fn); +DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_walk_rmap); TRACE_EVENT(xrep_abt_found, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, @@ -984,6 +984,38 @@ TRACE_EVENT(xrep_refc_found, __entry->refcount) ) +TRACE_EVENT(xrep_bmap_found, + TP_PROTO(struct xfs_inode *ip, int whichfork, + struct xfs_bmbt_irec *irec), + TP_ARGS(ip, whichfork, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, whichfork) + __field(xfs_fileoff_t, lblk) + __field(xfs_filblks_t, len) + __field(xfs_fsblock_t, pblk) + __field(int, state) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->whichfork = whichfork; + __entry->lblk = irec->br_startoff; + __entry->len = irec->br_blockcount; + __entry->pblk = irec->br_startblock; + __entry->state = irec->br_state; + ), + TP_printk("dev %d:%d ino 0x%llx whichfork %s fileoff 0x%llx fsbcount 0x%llx startblock 0x%llx state %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), + __entry->lblk, + __entry->len, + __entry->pblk, + __entry->state) +); + TRACE_EVENT(xrep_init_btblock, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_btnum_t btnum), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 67dec11e34c7..4e321bb81991 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -134,6 +134,62 @@ xfs_trans_dup( return ntp; } +/* + * Try to reserve more blocks for a transaction. + * + * This is for callers that need to attach resources to a transaction, scan + * those resources to determine the space reservation requirements, and then + * modify the attached resources. In other words, online repair. This can + * fail due to ENOSPC, so the caller must be able to cancel the transaction + * without shutting down the fs. + */ +int +xfs_trans_reserve_more( + struct xfs_trans *tp, + unsigned int blocks, + unsigned int rtextents) +{ + struct xfs_mount *mp = tp->t_mountp; + bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; + int error = 0; + + ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY)); + + /* + * Attempt to reserve the needed disk blocks by decrementing + * the number needed from the number available. This will + * fail if the count would go below zero. + */ + if (blocks > 0) { + error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); + if (error) + return -ENOSPC; + tp->t_blk_res += blocks; + } + + /* + * Attempt to reserve the needed realtime extents by decrementing + * the number needed from the number available. This will + * fail if the count would go below zero. + */ + if (rtextents > 0) { + error = xfs_mod_frextents(mp, -((int64_t)rtextents)); + if (error) { + error = -ENOSPC; + goto out_blocks; + } + tp->t_rtx_res += rtextents; + } + + return 0; +out_blocks: + if (blocks > 0) { + xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); + tp->t_blk_res -= blocks; + } + return error; +} + /* * This is called to reserve free disk blocks and log space for the * given transaction. This must be done before allocating any resources @@ -1080,6 +1136,38 @@ out_cancel: return error; } + +/* Try to reserve more blocks and file quota for a transaction. */ +int +xfs_trans_reserve_more_inode( + struct xfs_trans *tp, + struct xfs_inode *ip, + unsigned int dblocks, + unsigned int rblocks) +{ + struct xfs_mount *mp = ip->i_mount; + unsigned int rtx = rblocks / mp->m_sb.sb_rextsize; + bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; + int error; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + error = xfs_trans_reserve_more(tp, dblocks, rtx); + if (error) + return error; + + error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, rsvd); + if (!error) + return 0; + + /* Quota failed, give back the new reservation. */ + xfs_mod_fdblocks(mp, dblocks, rsvd); + tp->t_blk_res -= dblocks; + xfs_mod_frextents(mp, rtx); + tp->t_rtx_res -= rtx; + return error; +} + /* * Allocate an transaction in preparation for inode creation by reserving quota * against the given dquots. Callers are not required to hold any inode locks. diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 50da47f23a07..83912d97b403 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -166,6 +166,8 @@ typedef struct xfs_trans { int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp, uint blocks, uint rtextents, uint flags, struct xfs_trans **tpp); +int xfs_trans_reserve_more(struct xfs_trans *tp, + unsigned int blocks, unsigned int rtextents); int xfs_trans_alloc_empty(struct xfs_mount *mp, struct xfs_trans **tpp); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); @@ -258,6 +260,8 @@ struct xfs_dquot; int xfs_trans_alloc_inode(struct xfs_inode *ip, struct xfs_trans_res *resv, unsigned int dblocks, unsigned int rblocks, bool force, struct xfs_trans **tpp); +int xfs_trans_reserve_more_inode(struct xfs_trans *tp, struct xfs_inode *ip, + unsigned int dblocks, unsigned int rblocks); int xfs_trans_alloc_icreate(struct xfs_mount *mp, struct xfs_trans_res *resv, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, unsigned int dblocks, -- cgit v1.2.3