summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-01-05 17:43:53 -0800
committerDarrick J. Wong <djwong@kernel.org>2021-03-25 17:08:25 -0700
commit7ba69637d3380c4cc67f3a9481e7a2aa00942a3d (patch)
tree289aaf921402e9c997f9011fa93343199a07e76f
parent690885ed3461636b5a9d7c9dbe3352cd1eea2241 (diff)
xfs: ask to freeze if fscounters scrubber fails
If the fscounters scrubber notices incorrect summary counters, it's entirely possible that scrub is simply racing with other threads that are updating the incore counters. Therefore, if there's a mismatch and the fs isn't frozen, ask userspace if we can freeze the fs to eliminate the race condition. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/scrub/fscounters.c72
1 files changed, 48 insertions, 24 deletions
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index a4bc2db61b01..5149c6d4baa1 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -138,9 +138,16 @@ xchk_setup_fscounters(
/*
* Pause background reclaim while we're scrubbing to reduce the
* likelihood of background perturbations to the counters throwing off
- * our calculations.
+ * our calculations. If a previous check failed and userspace told us
+ * to freeze the fs, do that instead.
*/
- xchk_stop_reaping(sc);
+ if (sc->flags & XCHK_TRY_HARDER) {
+ error = xchk_fs_freeze(sc);
+ if (error)
+ return error;
+ } else {
+ xchk_stop_reaping(sc);
+ }
return xchk_trans_alloc(sc, 0);
}
@@ -234,8 +241,7 @@ retry:
if (fsc->ifree > fsc->icount) {
if (tries--)
goto retry;
- xchk_set_incomplete(sc);
- return 0;
+ return -EDEADLOCK;
}
return 0;
@@ -275,8 +281,12 @@ xchk_fscount_check_frextents(
goto out_unlock;
spin_lock(&mp->m_sb_lock);
- if (fsc->frextents != mp->m_sb.sb_frextents)
- xchk_set_corrupt(sc);
+ if (fsc->frextents != mp->m_sb.sb_frextents) {
+ if (sc->flags & XCHK_FS_FROZEN)
+ xchk_set_corrupt(sc);
+ else
+ error = -EDEADLOCK;
+ }
spin_unlock(&mp->m_sb_lock);
out_unlock:
@@ -329,16 +339,9 @@ xchk_fscount_within_range(
* If the difference between the two summations is too large, the fs
* might just be busy and so we'll mark the scrub incomplete. Return
* true here so that we don't mark the counter corrupt.
- *
- * XXX: In the future when userspace can grant scrub permission to
- * quiesce the filesystem to solve the outsized variance problem, this
- * check should be moved up and the return code changed to signal to
- * userspace that we need quiesce permission.
*/
- if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
- xchk_set_incomplete(sc);
- return true;
- }
+ if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE)
+ return false;
return false;
}
@@ -351,6 +354,8 @@ xchk_fscounters(
struct xfs_mount *mp = sc->mp;
struct xchk_fscounters *fsc = sc->buf;
int64_t icount, ifree, fdblocks;
+ bool frozen = sc->flags & XCHK_FS_FROZEN;
+ bool try_again = false;
int error;
/* Snapshot the percpu counters. */
@@ -381,19 +386,38 @@ xchk_fscounters(
error = xchk_fscount_aggregate_agcounts(sc, fsc);
if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
return error;
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
- return 0;
- /* Compare the in-core counters with whatever we counted. */
- if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
- xchk_set_corrupt(sc);
+ /*
+ * Compare the in-core counters with whatever we counted. If the fs is
+ * frozen, we treat the discrepancy as a corruption because the freeze
+ * should have stabilized the counter values. Otherwise, we need
+ * userspace to call us back having granted us freeze permission.
+ */
+ if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
+ fsc->icount)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
- if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
- xchk_set_corrupt(sc);
+ if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
- fsc->fdblocks))
- xchk_set_corrupt(sc);
+ fsc->fdblocks)) {
+ if (frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
+
+ if (try_again)
+ return -EDEADLOCK;
/* Check the free extents counter for rt volumes. */
error = xchk_fscount_check_frextents(sc, fsc);