summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:46:48 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-12-15 17:28:56 -0800
commita0a7d98f904c240e570dd4b37311d29ca28d6793 (patch)
tree7e71697dc6bce987b2b33cb8551d89a0cf3103f3 /fs/xfs
parente66c39ccc50ff07d006a1bdddf4c4e4c3a92ff78 (diff)
xfs: ask to freeze if fscounters scrubber fails
If the fscounters scrubber notices incorrect summary counters, it's entirely possible that scrub is simply racing with other threads that are updating the incore counters. Therefore, if there's a mismatch and the fs isn't frozen, ask userspace if we can freeze the fs to eliminate the race condition. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/scrub/fscounters.c72
1 files changed, 48 insertions, 24 deletions
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 5761fa94964a..9d3edd931f5a 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -133,9 +133,16 @@ xchk_setup_fscounters(
/*
* Pause background reclaim while we're scrubbing to reduce the
* likelihood of background perturbations to the counters throwing off
- * our calculations.
+ * our calculations. If a previous check failed and userspace told us
+ * to freeze the fs, do that instead.
*/
- xchk_stop_reaping(sc);
+ if (sc->flags & XCHK_TRY_HARDER) {
+ error = xchk_fs_freeze(sc);
+ if (error)
+ return error;
+ } else {
+ xchk_stop_reaping(sc);
+ }
return xchk_trans_alloc(sc, 0);
}
@@ -262,8 +269,7 @@ retry:
if (fsc->ifree > fsc->icount) {
if (tries--)
goto retry;
- xchk_set_incomplete(sc);
- return 0;
+ return -EDEADLOCK;
}
return 0;
@@ -311,8 +317,12 @@ xchk_fscount_check_frextents(
trace_xchk_fscounters_frextents_within_range(sc->mp, fsc->frextents,
mp->m_sb.sb_frextents);
- if (fsc->frextents != mp->m_sb.sb_frextents)
- xchk_set_corrupt(sc);
+ if (fsc->frextents != mp->m_sb.sb_frextents) {
+ if (sc->flags & XCHK_FS_FROZEN)
+ xchk_set_corrupt(sc);
+ else
+ error = -EDEADLOCK;
+ }
spin_unlock(&mp->m_sb_lock);
out_unlock:
@@ -379,16 +389,9 @@ xchk_fscount_within_range(
* If the difference between the two summations is too large, the fs
* might just be busy and so we'll mark the scrub incomplete. Return
* true here so that we don't mark the counter corrupt.
- *
- * XXX: In the future when userspace can grant scrub permission to
- * quiesce the filesystem to solve the outsized variance problem, this
- * check should be moved up and the return code changed to signal to
- * userspace that we need quiesce permission.
*/
- if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
- xchk_set_incomplete(sc);
- return true;
- }
+ if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE)
+ return false;
return false;
}
@@ -401,6 +404,8 @@ xchk_fscounters(
struct xfs_mount *mp = sc->mp;
struct xchk_fscounters *fsc = sc->buf;
int64_t icount, ifree, fdblocks;
+ bool frozen = sc->flags & XCHK_FS_FROZEN;
+ bool try_again = false;
int error;
/* Snapshot the percpu counters. */
@@ -431,19 +436,38 @@ xchk_fscounters(
error = xchk_fscount_aggregate_agcounts(sc, fsc);
if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
return error;
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
- return 0;
- /* Compare the in-core counters with whatever we counted. */
- if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
- xchk_set_corrupt(sc);
+ /*
+ * Compare the in-core counters with whatever we counted. If the fs is
+ * frozen, we treat the discrepancy as a corruption because the freeze
+ * should have stabilized the counter values. Otherwise, we need
+ * userspace to call us back having granted us freeze permission.
+ */
+ if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
+ fsc->icount)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
- if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
- xchk_set_corrupt(sc);
+ if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
- fsc->fdblocks))
- xchk_set_corrupt(sc);
+ fsc->fdblocks)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
+
+ if (try_again)
+ return -EDEADLOCK;
/* Check the free extents counter for rt volumes. */
error = xchk_fscount_check_frextents(sc, fsc);