tests/generic/038


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2014 SUSE Linux Products GmbH. All Rights Reserved.
#
# FSQA Test No. 038
#
# This test was motivated by btrfs issues, but it's generic enough as it
# doesn't use any btrfs specific features.
#
# Stress btrfs' block group allocation and deallocation while running fstrim in
# parallel. Part of the goal is also to get data block groups deallocated so
# that new metadata block groups, using the same physical device space ranges,
# get allocated while fstrim is running. This caused several issues ranging
# from invalid memory accesses, kernel crashes, metadata or data corruption,
# free space cache inconsistencies, free space leaks and memory leaks.
#
# These issues were fixed by the following btrfs linux kernel patches:
#
#   Btrfs: fix invalid block group rbtree access after bg is removed
#   Btrfs: fix crash caused by block group removal
#   Btrfs: fix freeing used extents after removing empty block group
#   Btrfs: fix race between fs trimming and block group remove/allocation
#   Btrfs: fix race between writing free space cache and trimming
#   Btrfs: make btrfs_abort_transaction consider existence of new block groups
#   Btrfs: fix memory leak after block remove + trimming
#   Btrfs: fix fs mapping extent map leak
#   Btrfs: fix unprotected deletion from pending_chunks list
#
# The issues were found on a qemu/kvm guest with 4 virtual CPUs, 4Gb of ram and
# scsi-hd devices with discard support enabled (that means hole punching in the
# disk's image file is performed by the host).
#
. ./common/preamble
_begin_fstest auto stress trim prealloc

# Override the default cleanup function.
_cleanup()
{
	[ -n "${create_pids}" ] && kill ${create_pids[@]}
	[ -n "${fallocate_pids}" ] && kill ${fallocate_pids[@]}
	[ -n "${trim_pids}" ] && kill ${trim_pids[@]}
	wait
	rm -fr $tmp
}

# Import common functions.
. ./common/filter

# real QA test starts here
_supported_fs generic
_require_scratch
_require_xfs_io_command "falloc"

echo "Silence is golden"

# Keep allocating and deallocating 1G of data space with the goal of creating
# and deleting 1 block group constantly. The intention is to race with the
# fstrim loop below.
fallocate_loop()
{
	# Wait for running subcommand before exitting so that
	# mountpoint is not busy when we try to unmount it
	trap "wait; exit" SIGTERM

	local name=$1
	while true; do
		$XFS_IO_PROG -f -c "falloc -k 0 1G" \
			$SCRATCH_MNT/$name &> /dev/null
		sleep 3
		$XFS_IO_PROG -c "truncate 0" \
			$SCRATCH_MNT/$name &> /dev/null
		sleep 3
	done
}

trim_loop()
{
	# Wait for running subcommand before exitting so that
	# mountpoint is not busy when we try to unmount it
	trap "wait; exit" SIGTERM

	while true; do
		$FSTRIM_PROG $SCRATCH_MNT
	done
}

# Create a bunch of small files that get their single extent inlined in the
# btree, so that we consume a lot of metadata space and get a chance of a
# data block group getting deleted and reused for metadata later. Sometimes
# the creation of all these files succeeds other times we get ENOSPC failures
# at some point - this depends on how fast the btrfs' cleaner kthread is
# notified about empty block groups, how fast it deletes them and how fast
# the fallocate calls happen. So we don't really care if they all succeed or
# not, the goal is just to keep metadata space usage growing while data block
# groups are deleted.
#
# Creating 200,000 files sequentially is really slow, so speed it up a bit
# by doing it concurrently with 4 threads in 4 separate directories.
nr_files=$((50000 * LOAD_FACTOR))
create_files()
{
	local prefix=$1
	local now=$(date '+%s')
	local end_time=$(( now + (TIME_FACTOR * 30) ))

	for ((n = 0; n < 4; n++)); do
		mkdir $SCRATCH_MNT/$n
		(
		trap "wait; exit" SIGTERM

		for ((i = 1; i <= $nr_files; i++)); do
			$XFS_IO_PROG -f -c "pwrite -S 0xaa 0 3900" \
				$SCRATCH_MNT/$n/"${prefix}_$i" &> /dev/null
			if [ $? -ne 0 ]; then
				echo "Failed creating file $n/${prefix}_$i" >>$seqres.full
				break
			fi
			if [ "$(date '+%s')" -ge $end_time ]; then
				echo "runtime exceeded @ $i files" >> $seqres.full
				break
			fi
		done
		) &
		create_pids[$n]=$!
	done

	wait ${create_pids[@]}
	unset create_pids
}

_scratch_mkfs >>$seqres.full 2>&1
_scratch_mount
_require_fs_space $SCRATCH_MNT $((10 * 1024 * 1024))
_require_batched_discard $SCRATCH_MNT

for ((i = 0; i < $((4 * $LOAD_FACTOR)); i++)); do
	trim_loop &
	trim_pids[$i]=$!
done

for ((i = 0; i < $((1 * $LOAD_FACTOR)); i++)); do
	fallocate_loop "falloc_file_$i" &
	fallocate_pids[$i]=$!
done

create_files "foobar"

kill ${fallocate_pids[@]}
kill ${trim_pids[@]}
wait
unset fallocate_pids
unset trim_pids

status=0
exit