summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:46:48 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:40:40 -0700
commit6b33040a567a1ddea1d2d6560a5e062a19d1732f (patch)
treec7814e040b61488a516e94b549247d583c63eecb /fs
parent52b4a83ce114626b05fda51fd0b935a5d77a4075 (diff)
xfs: ask to freeze if fscounters scrubber fails
If the fscounters scrubber notices incorrect summary counters, it's entirely possible that scrub is simply racing with other threads that are updating the incore counters. Therefore, if there's a mismatch and the fs isn't frozen, ask userspace if we can freeze the fs to eliminate the race condition. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/scrub/fscounters.c72
1 files changed, 48 insertions, 24 deletions
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 5761fa94964a..9d3edd931f5a 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -133,9 +133,16 @@ xchk_setup_fscounters(
/*
* Pause background reclaim while we're scrubbing to reduce the
* likelihood of background perturbations to the counters throwing off
- * our calculations.
+ * our calculations. If a previous check failed and userspace told us
+ * to freeze the fs, do that instead.
*/
- xchk_stop_reaping(sc);
+ if (sc->flags & XCHK_TRY_HARDER) {
+ error = xchk_fs_freeze(sc);
+ if (error)
+ return error;
+ } else {
+ xchk_stop_reaping(sc);
+ }
return xchk_trans_alloc(sc, 0);
}
@@ -262,8 +269,7 @@ retry:
if (fsc->ifree > fsc->icount) {
if (tries--)
goto retry;
- xchk_set_incomplete(sc);
- return 0;
+ return -EDEADLOCK;
}
return 0;
@@ -311,8 +317,12 @@ xchk_fscount_check_frextents(
trace_xchk_fscounters_frextents_within_range(sc->mp, fsc->frextents,
mp->m_sb.sb_frextents);
- if (fsc->frextents != mp->m_sb.sb_frextents)
- xchk_set_corrupt(sc);
+ if (fsc->frextents != mp->m_sb.sb_frextents) {
+ if (sc->flags & XCHK_FS_FROZEN)
+ xchk_set_corrupt(sc);
+ else
+ error = -EDEADLOCK;
+ }
spin_unlock(&mp->m_sb_lock);
out_unlock:
@@ -379,16 +389,9 @@ xchk_fscount_within_range(
* If the difference between the two summations is too large, the fs
* might just be busy and so we'll mark the scrub incomplete. Return
* true here so that we don't mark the counter corrupt.
- *
- * XXX: In the future when userspace can grant scrub permission to
- * quiesce the filesystem to solve the outsized variance problem, this
- * check should be moved up and the return code changed to signal to
- * userspace that we need quiesce permission.
*/
- if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
- xchk_set_incomplete(sc);
- return true;
- }
+ if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE)
+ return false;
return false;
}
@@ -401,6 +404,8 @@ xchk_fscounters(
struct xfs_mount *mp = sc->mp;
struct xchk_fscounters *fsc = sc->buf;
int64_t icount, ifree, fdblocks;
+ bool frozen = sc->flags & XCHK_FS_FROZEN;
+ bool try_again = false;
int error;
/* Snapshot the percpu counters. */
@@ -431,19 +436,38 @@ xchk_fscounters(
error = xchk_fscount_aggregate_agcounts(sc, fsc);
if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
return error;
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
- return 0;
- /* Compare the in-core counters with whatever we counted. */
- if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
- xchk_set_corrupt(sc);
+ /*
+ * Compare the in-core counters with whatever we counted. If the fs is
+ * frozen, we treat the discrepancy as a corruption because the freeze
+ * should have stabilized the counter values. Otherwise, we need
+ * userspace to call us back having granted us freeze permission.
+ */
+ if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
+ fsc->icount)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
- if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
- xchk_set_corrupt(sc);
+ if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
- fsc->fdblocks))
- xchk_set_corrupt(sc);
+ fsc->fdblocks)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
+
+ if (try_again)
+ return -EDEADLOCK;
/* Check the free extents counter for rt volumes. */
error = xchk_fscount_check_frextents(sc, fsc);