summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-01-05 17:43:53 -0800
committerDarrick J. Wong <djwong@kernel.org>2021-08-25 22:25:57 -0700
commit9de23feda7dcbf3357562337220b2cb26f95718d (patch)
treefaf3f05e664c80388ee39a5845daa72374ecfcb3
parentfbf773c456ffce4175554d29b9c2446898e3ab6c (diff)
xfs: ask to freeze if fscounters scrubber fails
If the fscounters scrubber notices incorrect summary counters, it's entirely possible that scrub is simply racing with other threads that are updating the incore counters. Therefore, if there's a mismatch and the fs isn't frozen, ask userspace if we can freeze the fs to eliminate the race condition. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/scrub/fscounters.c72
1 files changed, 48 insertions, 24 deletions
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 0a381e1c7383..a8794fb02b86 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -133,9 +133,16 @@ xchk_setup_fscounters(
/*
* Pause background reclaim while we're scrubbing to reduce the
* likelihood of background perturbations to the counters throwing off
- * our calculations.
+ * our calculations. If a previous check failed and userspace told us
+ * to freeze the fs, do that instead.
*/
- xchk_stop_reaping(sc);
+ if (sc->flags & XCHK_TRY_HARDER) {
+ error = xchk_fs_freeze(sc);
+ if (error)
+ return error;
+ } else {
+ xchk_stop_reaping(sc);
+ }
return xchk_trans_alloc(sc, 0);
}
@@ -262,8 +269,7 @@ retry:
if (fsc->ifree > fsc->icount) {
if (tries--)
goto retry;
- xchk_set_incomplete(sc);
- return 0;
+ return -EDEADLOCK;
}
return 0;
@@ -307,8 +313,12 @@ xchk_fscount_check_frextents(
trace_xchk_fscounters_frextents_within_range(sc->mp, fsc->frextents,
mp->m_sb.sb_frextents);
- if (fsc->frextents != mp->m_sb.sb_frextents)
- xchk_set_corrupt(sc);
+ if (fsc->frextents != mp->m_sb.sb_frextents) {
+ if (sc->flags & XCHK_FS_FROZEN)
+ xchk_set_corrupt(sc);
+ else
+ error = -EDEADLOCK;
+ }
spin_unlock(&mp->m_sb_lock);
out_unlock:
@@ -361,16 +371,9 @@ xchk_fscount_within_range(
* If the difference between the two summations is too large, the fs
* might just be busy and so we'll mark the scrub incomplete. Return
* true here so that we don't mark the counter corrupt.
- *
- * XXX: In the future when userspace can grant scrub permission to
- * quiesce the filesystem to solve the outsized variance problem, this
- * check should be moved up and the return code changed to signal to
- * userspace that we need quiesce permission.
*/
- if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
- xchk_set_incomplete(sc);
- return true;
- }
+ if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE)
+ return false;
return false;
}
@@ -383,6 +386,8 @@ xchk_fscounters(
struct xfs_mount *mp = sc->mp;
struct xchk_fscounters *fsc = sc->buf;
int64_t icount, ifree, fdblocks;
+ bool frozen = sc->flags & XCHK_FS_FROZEN;
+ bool try_again = false;
int error;
/* Snapshot the percpu counters. */
@@ -413,19 +418,38 @@ xchk_fscounters(
error = xchk_fscount_aggregate_agcounts(sc, fsc);
if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
return error;
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
- return 0;
- /* Compare the in-core counters with whatever we counted. */
- if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
- xchk_set_corrupt(sc);
+ /*
+ * Compare the in-core counters with whatever we counted. If the fs is
+ * frozen, we treat the discrepancy as a corruption because the freeze
+ * should have stabilized the counter values. Otherwise, we need
+ * userspace to call us back having granted us freeze permission.
+ */
+ if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
+ fsc->icount)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
- if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
- xchk_set_corrupt(sc);
+ if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
- fsc->fdblocks))
- xchk_set_corrupt(sc);
+ fsc->fdblocks)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
+
+ if (try_again)
+ return -EDEADLOCK;
/* Check the free extents counter for rt volumes. */
error = xchk_fscount_check_frextents(sc, fsc);