diff options
Diffstat (limited to 'fs/xfs/scrub/fscounters.c')
-rw-r--r-- | fs/xfs/scrub/fscounters.c | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c new file mode 100644 index 000000000000..6afe246a20f1 --- /dev/null +++ b/fs/xfs/scrub/fscounters.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2018 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" +#include "xfs_rmap.h" +#include "xfs_error.h" +#include "xfs_errortag.h" +#include "xfs_icache.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" + +/* + * FS Summary Counters + * =================== + * + * Filesystem summary counters are a tricky beast to check. We cannot have + * anyone changing the superblock fields, the percpu counters, or the AG + * headers while we do the global check. This means that we must freeze the + * filesystem for the entire duration. Once that's done, we compute what the + * incore counters /should/ be based on the counters in the AG headers + * (presumably we checked those in an earlier part of scrub) and the in-core + * free space reservations (both the user-changeable one and the per-AG ones). + * + * From there we compare the computed incore counts to the actual ones and + * complain if they're off. For repair we compute the deltas needed to + * correct the counters and then update the incore and ondisk counters + * accordingly. + */ + +/* Summary counter checks require a frozen fs. */ +int +xchk_setup_fscounters( + struct xfs_scrub *sc, + struct xfs_inode *ip) +{ + int error; + + /* Save counters across runs. */ + sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), KM_SLEEP); + if (!sc->buf) + return -ENOMEM; + + /* + * We need to prevent any other thread from changing the global fs + * summary counters while we're scrubbing or repairing them. This + * requires the fs to be frozen. + * + * Scrub can do some basic sanity checks if userspace does not permit + * us to freeze the filesystem. + */ + if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && + !(sc->sm->sm_flags & XFS_SCRUB_IFLAG_FREEZE_OK)) + return -EUSERS; + + /* + * Make sure we've purged every inactive inode in the system because + * our live inode walker won't touch anything that's in reclaim. + */ + xfs_inactive_force(sc->mp); + + if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FREEZE_OK) { + error = xfs_scrub_fs_freeze(sc); + if (error) + return error; + } + + /* Set up the scrub context. */ + return xchk_trans_alloc(sc, 0); +} + +/* + * Record the number of blocks reserved for this inode for future writes but + * not yet allocated to real space. In other words, we're looking for all + * subtractions from fdblocks that aren't backed by actual space allocations + * while we recalculate fdlbocks. + */ +STATIC int +xchk_fscounters_count_del( + struct xfs_inode *ip, + void *priv) +{ + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec rec; + struct xfs_ifork *ifp; + uint64_t *d = priv; + int64_t delblks = ip->i_delayed_blks; + + if (delblks == 0) + return 0; + + /* Add the indlen blocks for each data fork reservation. */ + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + for_each_xfs_iext(ifp, &icur, &rec) { + if (!isnullstartblock(rec.br_startblock)) + continue; + delblks += startblockval(rec.br_startblock); + } + + /* + * Add the indlen blocks for each CoW fork reservation. Remember + * that we count real/unwritten extents in the CoW fork towards + * i_delayed_blks, so we have to subtract those. If it's a delalloc + * reservation, add the indlen blocks instead. + */ + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + if (ifp) { + for_each_xfs_iext(ifp, &icur, &rec) { + if (isnullstartblock(rec.br_startblock)) + delblks += startblockval(rec.br_startblock); + else + delblks -= rec.br_blockcount; + } + } + + /* No, we can't have negative reservations. */ + if (delblks < 0) + return -EFSCORRUPTED; + + *d += delblks; + return 0; +} + +/* + * Calculate what the global in-core counters ought to be from the AG header + * contents. Callers can compare this to the actual in-core counters to + * calculate by how much both in-core and on-disk counters need to be + * adjusted. + */ +STATIC int +xchk_fscounters_calc( + struct xfs_scrub *sc, + struct xchk_fscounters *fsc) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_buf *agi_bp; + struct xfs_buf *agf_bp; + struct xfs_agi *agi; + struct xfs_agf *agf; + struct xfs_perag *pag; + uint64_t delayed = 0; + xfs_agnumber_t agno; + int error; + + ASSERT(sc->fs_frozen); + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + /* Count all the inodes */ + error = xfs_ialloc_read_agi(mp, sc->tp, agno, &agi_bp); + if (error) + return error; + agi = XFS_BUF_TO_AGI(agi_bp); + fsc->icount += be32_to_cpu(agi->agi_count); + fsc->ifree += be32_to_cpu(agi->agi_freecount); + + /* Add up the free/freelist/bnobt/cntbt blocks */ + error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &agf_bp); + if (error) + return error; + if (!agf_bp) + return -ENOMEM; + agf = XFS_BUF_TO_AGF(agf_bp); + fsc->fdblocks += be32_to_cpu(agf->agf_freeblks); + fsc->fdblocks += be32_to_cpu(agf->agf_flcount); + fsc->fdblocks += be32_to_cpu(agf->agf_btreeblks); + + /* + * Per-AG reservations are taken out of the incore counters, + * so count them out. + */ + pag = xfs_perag_get(mp, agno); + fsc->fdblocks -= pag->pag_meta_resv.ar_reserved; + fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved; + xfs_perag_put(pag); + } + + /* + * The global space reservation is taken out of the incore counters, + * so count that out too. + */ + fsc->fdblocks -= mp->m_resblks_avail; + + /* + * Delayed allocation reservations are taken out of the incore counters + * but not recorded on disk, so count them out too. + */ + error = xfs_scrub_foreach_live_inode(sc, xchk_fscounters_count_del, + &delayed); + if (error) + return error; + fsc->fdblocks -= delayed; + + trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, + fsc->fdblocks, delayed); + + /* Bail out if the values we compute are totally nonsense. */ + if (!xfs_verify_icount(mp, fsc->icount) || + fsc->fdblocks > mp->m_sb.sb_dblocks || + fsc->ifree > fsc->icount) + return -EFSCORRUPTED; + + return 0; +} + +/* + * Check the superblock counters. + * + * The filesystem must be frozen so that the counters do not change while + * we're computing the summary counters. + */ +int +xchk_fscounters( + struct xfs_scrub *sc) +{ + struct xfs_mount *mp = sc->mp; + struct xchk_fscounters *fsc = sc->buf; + int error; + + /* See if icount is obviously wrong. */ + if (!xfs_verify_icount(mp, mp->m_sb.sb_icount)) + xchk_block_set_corrupt(sc, mp->m_sb_bp); + + /* See if fdblocks / ifree are obviously wrong. */ + if (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks) + xchk_block_set_corrupt(sc, mp->m_sb_bp); + if (mp->m_sb.sb_ifree > mp->m_sb.sb_icount) + xchk_block_set_corrupt(sc, mp->m_sb_bp); + + /* Did we already flag bad summary counters? */ + if (XFS_TEST_ERROR((mp->m_flags & XFS_MOUNT_BAD_SUMMARY), mp, + XFS_ERRTAG_FORCE_SUMMARY_RECALC)) + xchk_block_set_corrupt(sc, mp->m_sb_bp); + else if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + xfs_force_summary_recalc(sc->mp); + + /* + * If we're only checking for corruption and we found it, exit now. + * + * Repair depends on the counter values we collect here, so if the + * IFLAG_REPAIR flag is set we must continue to calculate the correct + * counter values. + */ + if (!(sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + return 0; + + /* Bail out if we need to be frozen to do the hard checks. */ + if (!sc->fs_frozen) { + xchk_set_incomplete(sc); + return -EUSERS; + } + + /* Counters seem ok, but let's count them. */ + error = xchk_fscounters_calc(sc, fsc); + if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(sc->mp), &error)) + return error; + + /* + * Compare the in-core counters. In theory we sync'd the superblock + * when we did the repair freeze, so they should be the same as the + * percpu counters. + */ + spin_lock(&mp->m_sb_lock); + if (mp->m_sb.sb_icount != fsc->icount) + xchk_block_set_corrupt(sc, mp->m_sb_bp); + if (mp->m_sb.sb_ifree != fsc->ifree) + xchk_block_set_corrupt(sc, mp->m_sb_bp); + if (mp->m_sb.sb_fdblocks != fsc->fdblocks) + xchk_block_set_corrupt(sc, mp->m_sb_bp); + spin_unlock(&mp->m_sb_lock); + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + xfs_force_summary_recalc(sc->mp); + + return 0; +} |