summaryrefslogtreecommitdiff
path: root/common/xfs
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2022-08-02 21:22:35 -0700
committerZorro Lang <zlang@kernel.org>2022-09-04 21:44:05 +0800
commit4f929ebc59285d47f8a79c3d3f1e5f4ce316d6d0 (patch)
tree7689a249249752c8cab879ca5eb4a5707e5fe7d4 /common/xfs
parentf3c314574d011e4b8d5c6f605e0ec6fed9f3804c (diff)
common: disable infinite IO error retry for EIO shutdown tests
This patch fixes a rather hard to hit livelock in the tests that test how xfs handles shutdown behavior when the device suddenly dies and starts returing EIO all the time. The livelock happens if the AIL is stuck retrying failed metadata updates forever, the log itself is not being written, and there is no more log grant space, which prevents the frontend from shutting down the log due to EIO errors during transactions. While most users probably want the default retry-forever behavior because EIO can be transient, the circumstances are different here. The tests are designed to flip the device back to working status only after the unmount succeeds, so we know there's no point in the filesystem retrying writes until after the unmount. This fixes some of the periodic hangs in generic/019 and generic/475. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Zorro Lang <zlang@redhat.com> Signed-off-by: Zorro Lang <zlang@kernel.org>
Diffstat (limited to 'common/xfs')
-rw-r--r--common/xfs29
1 files changed, 29 insertions, 0 deletions
diff --git a/common/xfs b/common/xfs
index 92c281c6..65234c8b 100644
--- a/common/xfs
+++ b/common/xfs
@@ -823,6 +823,35 @@ _scratch_xfs_unmount_dirty()
_scratch_unmount
}
+# Prepare a mounted filesystem for an IO error shutdown test by disabling retry
+# for metadata writes. This prevents a (rare) log livelock when:
+#
+# - The log has given out all available grant space, preventing any new
+# writers from tripping over IO errors (and shutting down the fs/log),
+# - All log buffers were written to disk, and
+# - The log tail is pinned because the AIL keeps hitting EIO trying to write
+# committed changes back into the filesystem.
+#
+# Real users might want the default behavior of the AIL retrying writes forever
+# but for testing purposes we don't want to wait.
+#
+# The sole parameter should be the filesystem data device, e.g. $SCRATCH_DEV.
+_xfs_prepare_for_eio_shutdown()
+{
+ local dev="$1"
+ local ctlfile="error/fail_at_unmount"
+
+ # Don't retry any writes during the (presumably) post-shutdown unmount
+ _has_fs_sysfs "$ctlfile" && _set_fs_sysfs_attr $dev "$ctlfile" 1
+
+ # Disable retry of metadata writes that fail with EIO
+ for ctl in max_retries retry_timeout_seconds; do
+ ctlfile="error/metadata/EIO/$ctl"
+
+ _has_fs_sysfs "$ctlfile" && _set_fs_sysfs_attr $dev "$ctlfile" 0
+ done
+}
+
# Skip if we are running an older binary without the stricter input checks.
# Make multiple checks to be sure that there is no regression on the one
# selected feature check, which would skew the result.