blob: 2ae96ee230ef781ca0a051b7b96310aa874707fd (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2021 Oracle, Inc. All Rights Reserved.
#
# FS QA Test No. 648
#
# Test nested log recovery with repeated (simulated) disk failures. We kick
# off fsstress on a loopback filesystem mounted on the scratch fs, then switch
# out the underlying scratch device with dm-error to see what happens when the
# disk goes down. Having taken down both fses in this manner, remount them and
# repeat. This test simulates VM hosts crashing to try to shake out CoW bugs
# in writeback on the host that cause VM guests to fail to recover.
#
. ./common/preamble
_begin_fstest shutdown auto log metadata eio recoveryloop
_cleanup()
{
cd /
$KILLALL_PROG -9 fsstress > /dev/null 2>&1
wait
if [ -n "$loopmnt" ]; then
$UMOUNT_PROG $loopmnt 2>/dev/null
rm -r -f $loopmnt
fi
rm -f $tmp.*
_dmerror_unmount
_dmerror_cleanup
}
# Import common functions.
. ./common/dmerror
. ./common/reflink
# Modify as appropriate.
_supported_fs generic
_require_scratch_reflink
_require_cp_reflink
_require_dm_target error
_require_command "$KILLALL_PROG" "killall"
_require_loop
echo "Silence is golden."
_scratch_mkfs >> $seqres.full 2>&1
_require_metadata_journaling $SCRATCH_DEV
_dmerror_init
_dmerror_mount
# Create a fs image consuming 1/3 of the scratch fs
scratch_freesp_bytes=$(_get_available_space $SCRATCH_MNT)
loopimg_bytes=$((scratch_freesp_bytes / 3))
loopimg=$SCRATCH_MNT/testfs
truncate -s $loopimg_bytes $loopimg
_mkfs_dev $loopimg
loopmnt=$tmp.mount
mkdir -p $loopmnt
scratch_aliveflag=$tmp.runsnap
snap_aliveflag=$tmp.snapping
snap_loop_fs() {
touch "$snap_aliveflag"
while [ -e "$scratch_aliveflag" ]; do
rm -f $loopimg.a
_cp_reflink $loopimg $loopimg.a
sleep 1
done
rm -f "$snap_aliveflag"
}
fsstress=($FSSTRESS_PROG $FSSTRESS_AVOID -d "$loopmnt" -n 999999 -p "$((LOAD_FACTOR * 4))")
while _soak_loop_running $((25 * TIME_FACTOR)); do
touch $scratch_aliveflag
snap_loop_fs >> $seqres.full 2>&1 &
if ! _mount $loopimg $loopmnt -o loop; then
rm -f $scratch_aliveflag
_metadump_dev $loopimg $seqres.loop.$i.md
_fail "iteration $SOAK_LOOPIDX loopimg mount failed"
break
fi
("${fsstress[@]}" >> $seqres.full &) > /dev/null 2>&1
# purposely include 0 second sleeps to test shutdown immediately after
# recovery
sleep $((RANDOM % (3 * TIME_FACTOR) ))
rm -f $scratch_aliveflag
# This test aims to simulate sudden disk failure, which means that we
# do not want to quiesce the filesystem or otherwise give it a chance
# to flush its logs. Therefore we want to call dmsetup with the
# --nolockfs parameter; to make this happen we must call the load
# error table helper *without* 'lockfs'.
_dmerror_load_error_table
ps -e | grep fsstress > /dev/null 2>&1
while [ $? -eq 0 ]; do
$KILLALL_PROG -9 fsstress > /dev/null 2>&1
wait > /dev/null 2>&1
ps -e | grep fsstress > /dev/null 2>&1
done
for ((j = 0; j < 10; j++)); do
test -e "$snap_aliveflag" || break
sleep 1
done
# Mount again to replay log after loading working table, so we have a
# consistent fs after test.
$UMOUNT_PROG $loopmnt
is_unmounted=1
# We must unmount dmerror at here, or whole later testing will crash.
# So try to umount enough times, before we have no choice.
for ((j = 0; j < 100; j++)); do
sleep 1
_dmerror_unmount > $tmp.unmount.err 2>&1
if [ $? -eq 0 ];then
is_unmounted=0
break
fi
done
if [ $is_unmounted -ne 0 ];then
cat $tmp.unmount.err
_fail "iteration $SOAK_LOOPIDX scratch unmount failed"
fi
_dmerror_load_working_table
if ! _dmerror_mount; then
_metadump_dev $DMERROR_DEV $seqres.scratch.$i.md
_fail "iteration $SOAK_LOOPIDX scratch mount failed"
fi
done
# Make sure the fs image file is ok
if [ -f "$loopimg" ]; then
if _mount -o loop -t $FSTYP $loopimg $loopmnt -o loop; then
$UMOUNT_PROG $loopmnt &> /dev/null
else
_metadump_dev $DMERROR_DEV $seqres.scratch.final.md
echo "final scratch mount failed"
fi
SCRATCH_RTDEV= SCRATCH_LOGDEV= _check_scratch_fs $loopimg
fi
# success, all done; let the test harness check the scratch fs
status=0
exit
|