summaryrefslogtreecommitdiff
path: root/tests/btrfs/011
blob: ff52ada94a17d032087cb9819e567bb36337217b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2013 STRATO.  All rights reserved.
#
# FSQA Test No. btrfs/011
#
# Test of the btrfs replace operation.
#
# The amount of tests done depends on the number of devices in the
# SCRATCH_DEV_POOL. For full test coverage, at least 5 devices should
# be available (e.g. 5 partitions).
#
# The source and target devices for the replace operation are
# arbitrarily chosen out of SCRATCH_DEV_POOl. Since the target device
# mustn't be smaller than the source device, the requirement for this
# test is that all devices have _exactly_ the same size. If this is
# not the case, this test is not run.
#
# To check the filesystems after replacing a device, a scrub run is
# performed, a btrfsck run, and finally the filesystem is remounted.
#
. ./common/preamble
_begin_fstest auto replace volume scrub

noise_pid=0

# Override the default cleanup function.
_cleanup()
{
	if [ $noise_pid -ne 0 ] && ps -p $noise_pid | grep -q $noise_pid; then
		kill -TERM $noise_pid
	fi
	wait
	rm -f $tmp.*
	# we need this umount and couldn't rely on _require_scratch to umount
	# it from next test, because we would replace SCRATCH_DEV, which is
	# needed by _require_scratch, and make it umounted.
	_scratch_unmount > /dev/null 2>&1
}

# Import common functions.
. ./common/filter

# real QA test starts here
_supported_fs btrfs
_require_scratch_nocheck
_require_scratch_dev_pool 5
_require_scratch_dev_pool_equal_size
_require_scratch_size $((10 * 1024 * 1024)) #kB
_require_command "$WIPEFS_PROG" wipefs
_btrfs_get_profile_configs dup

rm -f $tmp.*

echo "*** test btrfs replace"

# In seconds
wait_time=1

fill_scratch()
{
	local fssize=$1
	local filler_pid

	# Fill inline extents.
	for i in `seq 1 500`; do
		_ddt of=$SCRATCH_MNT/s$i bs=3800 count=1
	done > /dev/null 2>&1

	# Fill data extents.
	for i in `seq 1 500`; do
		_ddt of=$SCRATCH_MNT/l$i bs=16385 count=1
	done > /dev/null 2>&1
	_ddt of=$SCRATCH_MNT/t0 bs=1M count=1 > /dev/null 2>&1
	for i in `seq $fssize`; do
		cp $SCRATCH_MNT/t0 $SCRATCH_MNT/t$i || _fail "cp failed"
	done > /dev/null 2>> $seqres.full

	# Ensure we have enough data so that dev-replace would take at least
	# 2 * $wait_time, allowing we cancel the running replace.
	# Some extra points:
	# - Use XFS_IO_PROG instead of dd
	#   fstests wraps dd, making it pretty hard to kill the real dd pid
	# - Use 64K block size with Direct IO
	#   64K is the same stripe size used in replace/scrub. Using Direct IO
	#   ensure the IO speed is near device limit and comparable to replace
	#   speed.
	$XFS_IO_PROG -f -d -c "pwrite -b 64k 0 1E" "$SCRATCH_MNT/t_filler" &>\
		$tmp.filler_result &
	filler_pid=$!
	sleep $((2 * $wait_time))
	kill -KILL $filler_pid &> /dev/null
	wait $filler_pid &> /dev/null

	# If the system is too fast and the fs is too small, then skip the test
	if grep -q "No space left" $tmp.filler_result; then
		ls -alh $SCRATCH_MNT >> $seqres.full
		cat $tmp.filler_result >> $seqres.full
		_notrun "fs too small for this test"
	fi
	cat $tmp.filler_result
	sync; sync
}

workout()
{
	local mkfs_options="$1"
	local num_devs4raid="$2"
	local with_cancel="$3"
	local fssize="$4"
	local source_dev="`echo ${SCRATCH_DEV_POOL} | $AWK_PROG '{print $1}'`"
	local quick="quick"

	[[ $fssize != 64 ]] && quick="thorough"

	echo -e "\\n---------workout \"$1\" $2 $3 $4-----------" >> $seqres.full

	$WIPEFS_PROG -a $SCRATCH_DEV_POOL > /dev/null 2>&1
	_scratch_dev_pool_get $num_devs4raid
	_spare_dev_get

	_scratch_pool_mkfs $mkfs_options >> $seqres.full 2>&1 ||\
		_fail "mkfs failed"

	_scratch_mount
	_require_fs_space $SCRATCH_MNT $((2 * 512 * 1024)) #2.5G

	fill_scratch $fssize
	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT

	echo -e "Replace from $source_dev to $SPARE_DEV\\n" >> $seqres.full
	btrfs_replace_test $source_dev $SPARE_DEV "" $with_cancel $quick

	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT

	# Skip -r test for configs without mirror OR replace cancel
	if echo $mkfs_options | grep -E -qv "raid1|raid5|raid6|raid10" || \
	   [ "${with_cancel}Q" = "cancelQ" ]; then
		_scratch_unmount > /dev/null 2>&1
		_scratch_dev_pool_put
		_spare_dev_put
		return 0
	fi

	# Due to above replace, now SPARE_DEV is part of the FS, check that.
	$BTRFS_UTIL_PROG filesystem show -m $SCRATCH_MNT |\
		grep -qs $SPARE_DEV$ ||\
		_fail "$SPARE_DEV is not part of SCRATCH_FS"

	btrfs_replace_test $SPARE_DEV $source_dev "-r" $with_cancel $quick

	_scratch_unmount > /dev/null 2>&1
	_scratch_dev_pool_put
	_spare_dev_put
}

btrfs_replace_test()
{
	local source_dev="$1"
	local target_dev="$2"
	local replace_options="$3"
	local with_cancel="$4"
	local quick="$5"

	# generate some (slow) background traffic in parallel to the
	# replace operation. It is not a problem if cat fails early
	# with ENOSPC.
	cat /dev/urandom | od > $SCRATCH_MNT/noise 2>> $seqres.full &
	noise_pid=$!

	if [ "${with_cancel}Q" = "cancelQ" ]; then
		# background the replace operation (no '-B' option given)
		_run_btrfs_util_prog replace start -f $replace_options $source_dev $target_dev $SCRATCH_MNT
		sleep $wait_time
		$BTRFS_UTIL_PROG replace cancel $SCRATCH_MNT 2>&1 >> $seqres.full

		# 'replace status' waits for the replace operation to finish
		# before the status is printed
		$BTRFS_UTIL_PROG replace status $SCRATCH_MNT > $tmp.tmp 2>&1
		cat $tmp.tmp >> $seqres.full

		# If the replace is finished, we need to replace $source_dev
		# back with $target_dev, or later fsck will fail and abort
		# the test, reducing the coverage.
		if grep -q finished $tmp.tmp ; then
			$BTRFS_UTIL_PROG replace start -Bf $target_dev \
				$source_dev $SCRATCH_MNT > /dev/null
		fi

		# For above finished case, we still output the error message
		# but continue the test, or later profiles won't get tested
		# at all.
		grep -q canceled $tmp.tmp || echo "btrfs replace status (canceled) failed"
	else
		if [ "${quick}Q" = "thoroughQ" ]; then
			# The thorough test runs around 2 * $wait_time seconds.
			# This is a chance to force a sync in the middle of the
			# replace operation.
			(sleep $wait_time; sync) > /dev/null 2>&1 &
		fi
		_run_btrfs_util_prog replace start -Bf $replace_options $source_dev $target_dev $SCRATCH_MNT

		$BTRFS_UTIL_PROG replace status $SCRATCH_MNT > $tmp.tmp 2>&1
		cat $tmp.tmp >> $seqres.full
		grep -q finished $tmp.tmp || _fail "btrfs replace status (finished) failed"
	fi

	if ps -p $noise_pid | grep -q $noise_pid; then
		kill -TERM $noise_pid 2> /dev/null
	fi
	noise_pid=0
	wait

	# scrub tests on-disk data, that's the reason for the sync.
	# With the '-B' option (don't background), any type of error causes
	# exit values != 0, including detected correctable and uncorrectable
	# errors on the device.
	sync; sync
	_run_btrfs_util_prog scrub start -B $SCRATCH_MNT

	# Two tests are performed, the 1st is to btrfsck the filesystem,
	# and the 2nd test is to mount the filesystem.
	# Usually _check_btrfs_filesystem would perform the mount test,
	# but it gets confused by the mount output that shows SCRATCH_MNT
	# mounted but not being mounted to SCRATCH_DEV. This happens
	# because in /proc/mounts the 2nd device of the filesystem is
	# shown after the replace operation. Let's just do the mount
	# test manually after _check_btrfs_filesystem is finished.
	_scratch_unmount > /dev/null 2>&1
	if [ "${with_cancel}Q" != "cancelQ" ]; then
		# after the replace operation, use the target_dev for everything
		echo "_check_btrfs_filesystem $target_dev" >> $seqres.full
		_check_btrfs_filesystem $target_dev
		_mount -t $FSTYP `_scratch_mount_options | sed "s&${SCRATCH_DEV}&${target_dev}&"`
	else
		_check_btrfs_filesystem $source_dev
		_scratch_mount
	fi
}

for t in "-m single -d single:1 no 64" \
	"-m dup -d single:1 no 64" \
	"-m dup -d single:1 cancel 1024" \
	"-m raid0 -d raid0:2 no 64" \
	"-m raid1 -d raid1:2 no 2048" \
	"-m raid10 -d raid10:4 no 64" \
	"-m single -d single -M:1 no 64" \
	"-m dup -d dup -M:1 no 64" \
	"-m raid5 -d raid5:2 no 64" \
	"-m raid6 -d raid6:3 no 64"; do
	mkfs_option=${t%:*}
	workout_option=${t#*:}
	if [[ "${_btrfs_profile_configs[@]}" =~ "${mkfs_option/ -M}"( |$) ]]; then
		workout "$mkfs_option" $workout_option
	fi
done

echo "*** done"
status=0
exit