summaryrefslogtreecommitdiff
path: root/tests/generic/648
blob: 2ae96ee230ef781ca0a051b7b96310aa874707fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2021 Oracle, Inc.  All Rights Reserved.
#
# FS QA Test No. 648
#
# Test nested log recovery with repeated (simulated) disk failures.  We kick
# off fsstress on a loopback filesystem mounted on the scratch fs, then switch
# out the underlying scratch device with dm-error to see what happens when the
# disk goes down.  Having taken down both fses in this manner, remount them and
# repeat.  This test simulates VM hosts crashing to try to shake out CoW bugs
# in writeback on the host that cause VM guests to fail to recover.
#
. ./common/preamble
_begin_fstest shutdown auto log metadata eio recoveryloop

_cleanup()
{
	cd /
	$KILLALL_PROG -9 fsstress > /dev/null 2>&1
	wait
	if [ -n "$loopmnt" ]; then
		$UMOUNT_PROG $loopmnt 2>/dev/null
		rm -r -f $loopmnt
	fi
	rm -f $tmp.*
	_dmerror_unmount
	_dmerror_cleanup
}

# Import common functions.
. ./common/dmerror
. ./common/reflink

# Modify as appropriate.
_supported_fs generic

_require_scratch_reflink
_require_cp_reflink
_require_dm_target error
_require_command "$KILLALL_PROG" "killall"
_require_loop

echo "Silence is golden."

_scratch_mkfs >> $seqres.full 2>&1
_require_metadata_journaling $SCRATCH_DEV
_dmerror_init
_dmerror_mount

# Create a fs image consuming 1/3 of the scratch fs
scratch_freesp_bytes=$(_get_available_space $SCRATCH_MNT)
loopimg_bytes=$((scratch_freesp_bytes / 3))

loopimg=$SCRATCH_MNT/testfs
truncate -s $loopimg_bytes $loopimg
_mkfs_dev $loopimg

loopmnt=$tmp.mount
mkdir -p $loopmnt

scratch_aliveflag=$tmp.runsnap
snap_aliveflag=$tmp.snapping

snap_loop_fs() {
	touch "$snap_aliveflag"
	while [ -e "$scratch_aliveflag" ]; do
		rm -f $loopimg.a
		_cp_reflink $loopimg $loopimg.a
		sleep 1
	done
	rm -f "$snap_aliveflag"
}

fsstress=($FSSTRESS_PROG $FSSTRESS_AVOID -d "$loopmnt" -n 999999 -p "$((LOAD_FACTOR * 4))")

while _soak_loop_running $((25 * TIME_FACTOR)); do
	touch $scratch_aliveflag
	snap_loop_fs >> $seqres.full 2>&1 &

	if ! _mount $loopimg $loopmnt -o loop; then
		rm -f $scratch_aliveflag
		_metadump_dev $loopimg $seqres.loop.$i.md
		_fail "iteration $SOAK_LOOPIDX loopimg mount failed"
		break
	fi

	("${fsstress[@]}" >> $seqres.full &) > /dev/null 2>&1

	# purposely include 0 second sleeps to test shutdown immediately after
	# recovery
	sleep $((RANDOM % (3 * TIME_FACTOR) ))
	rm -f $scratch_aliveflag

	# This test aims to simulate sudden disk failure, which means that we
	# do not want to quiesce the filesystem or otherwise give it a chance
	# to flush its logs.  Therefore we want to call dmsetup with the
	# --nolockfs parameter; to make this happen we must call the load
	# error table helper *without* 'lockfs'.
	_dmerror_load_error_table

	ps -e | grep fsstress > /dev/null 2>&1
	while [ $? -eq 0 ]; do
		$KILLALL_PROG -9 fsstress > /dev/null 2>&1
		wait > /dev/null 2>&1
		ps -e | grep fsstress > /dev/null 2>&1
	done
	for ((j = 0; j < 10; j++)); do
		test -e "$snap_aliveflag" || break
		sleep 1
	done

	# Mount again to replay log after loading working table, so we have a
	# consistent fs after test.
	$UMOUNT_PROG $loopmnt
	is_unmounted=1
	# We must unmount dmerror at here, or whole later testing will crash.
	# So try to umount enough times, before we have no choice.
	for ((j = 0; j < 100; j++)); do
		sleep 1
		_dmerror_unmount > $tmp.unmount.err 2>&1
		if [ $? -eq 0 ];then
			is_unmounted=0
			break
		fi
	done
	if [ $is_unmounted -ne 0 ];then
		cat $tmp.unmount.err
		_fail "iteration $SOAK_LOOPIDX scratch unmount failed"
	fi
	_dmerror_load_working_table
	if ! _dmerror_mount; then
		_metadump_dev $DMERROR_DEV $seqres.scratch.$i.md
		_fail "iteration $SOAK_LOOPIDX scratch mount failed"
	fi
done

# Make sure the fs image file is ok
if [ -f "$loopimg" ]; then
	if _mount -o loop -t $FSTYP $loopimg $loopmnt -o loop; then
		$UMOUNT_PROG $loopmnt &> /dev/null
	else
		_metadump_dev $DMERROR_DEV $seqres.scratch.final.md
		echo "final scratch mount failed"
	fi
	SCRATCH_RTDEV= SCRATCH_LOGDEV= _check_scratch_fs $loopimg
fi

# success, all done; let the test harness check the scratch fs
status=0
exit