summaryrefslogtreecommitdiff
path: root/tests/xfs/556
blob: 2f8cad1a2acb3543a26e8d78da1c3396c8dcdf1a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#! /bin/bash
# SPDX-License-Identifier: GPL-2.0-or-later
# Copyright (c) 2022 Oracle.  All Rights Reserved.
#
# FS QA Test No. 556
#
# Check xfs_scrub's media scan can actually return diagnostic information for
# media errors in file data extents.

. ./common/preamble
_begin_fstest auto quick scrub eio

# Override the default cleanup function.
_cleanup()
{
	cd /
	rm -f $tmp.*
	_dmerror_cleanup
}

# Import common functions.
. ./common/fuzzy
. ./common/filter
. ./common/dmerror

# real QA test starts here
_supported_fs xfs
_require_scratch
_require_scratch_xfs_crc
_require_scrub
_require_dm_target error

filter_scrub_errors() {
	_filter_scratch | sed \
		-e "s/offset $((fs_blksz * 2)) /offset 2FSB /g" \
		-e "s/length $fs_blksz.*/length 1FSB./g"
}

_scratch_mkfs >> $seqres.full
_dmerror_init
_dmerror_mount >> $seqres.full 2>&1

_require_scratch_xfs_scrub

# Write a file with 4 file blocks worth of data
victim=$SCRATCH_MNT/a
file_blksz=$(_get_file_block_size $SCRATCH_MNT)
$XFS_IO_PROG -f -c "pwrite -S 0x58 0 $((4 * file_blksz))" -c "fsync" $victim >> $seqres.full
unset errordev
_xfs_is_realtime_file $victim && errordev="RT"
bmap_str="$($XFS_IO_PROG -c "bmap -elpv" $victim | grep "^[[:space:]]*0:")"
echo "$errordev:$bmap_str" >> $seqres.full

phys="$(echo "$bmap_str" | $AWK_PROG '{print $3}')"
if [ "$errordev" = "RT" ]; then
	len="$(echo "$bmap_str" | $AWK_PROG '{print $4}')"
else
	len="$(echo "$bmap_str" | $AWK_PROG '{print $6}')"
fi
fs_blksz=$(_get_block_size $SCRATCH_MNT)
echo "file_blksz:$file_blksz:fs_blksz:$fs_blksz" >> $seqres.full
kernel_sectors_per_fs_block=$((fs_blksz / 512))

# Did we get at least 4 fs blocks worth of extent?
min_len_sectors=$(( 4 * kernel_sectors_per_fs_block ))
test "$len" -lt $min_len_sectors && \
	_fail "could not format a long enough extent on an empty fs??"

phys_start=$(echo "$phys" | sed -e 's/\.\..*//g')

echo "$errordev:$phys:$len:$fs_blksz:$phys_start" >> $seqres.full
echo "victim file:" >> $seqres.full
od -tx1 -Ad -c $victim >> $seqres.full

# Set the dmerror table so that all IO will pass through.
_dmerror_reset_table

cat >> $seqres.full << ENDL
dmerror before:
$DMERROR_TABLE
$DMERROR_RTTABLE
<end table>
ENDL

# All sector numbers that we feed to the kernel must be in units of 512b, but
# they also must be aligned to the device's logical block size.
logical_block_size=$(_min_dio_alignment $SCRATCH_DEV)
kernel_sectors_per_device_lba=$((logical_block_size / 512))

# Mark as bad one of the device LBAs in the middle of the extent.  Target the
# second LBA of the third block of the four-block file extent that we allocated
# earlier, but without overflowing into the fourth file block.
bad_sector=$(( phys_start + (2 * kernel_sectors_per_fs_block) ))
bad_len=$kernel_sectors_per_device_lba
if (( kernel_sectors_per_device_lba < kernel_sectors_per_fs_block )); then
	bad_sector=$((bad_sector + kernel_sectors_per_device_lba))
fi
if (( (bad_sector % kernel_sectors_per_device_lba) != 0)); then
	echo "bad_sector $bad_sector not congruent with device logical block size $logical_block_size"
fi
_dmerror_mark_range_bad $bad_sector $bad_len $errordev

cat >> $seqres.full << ENDL
dmerror after marking bad:
$DMERROR_TABLE
$DMERROR_RTTABLE
<end table>
ENDL

_dmerror_load_error_table

# See if the media scan picks it up.
echo "Scrub for injected media error (single threaded)"

# Once in single-threaded mode
_scratch_scrub -b -x >> $seqres.full 2> $tmp.error
cat $tmp.error | filter_scrub_errors

# Once in parallel mode
echo "Scrub for injected media error (multi threaded)"
_scratch_scrub -x >> $seqres.full 2> $tmp.error
cat $tmp.error | filter_scrub_errors

# Remount to flush the page cache and reread to see the IO error
_dmerror_unmount
_dmerror_mount
echo "victim file:" >> $seqres.full
od -tx1 -Ad -c $victim >> $seqres.full 2> $tmp.error
cat $tmp.error | sed -e 's/read error: //g' | _filter_scratch

# Scrub again to re-confirm the media error across a remount
echo "Scrub for injected media error (after remount)"
_scratch_scrub -x >> $seqres.full 2> $tmp.error
cat $tmp.error | filter_scrub_errors

# Now mark the bad range good so that a retest shows no media failure.
_dmerror_mark_range_good $bad_sector $bad_len $errordev
_dmerror_load_error_table

cat >> $seqres.full << ENDL
dmerror after marking good:
$DMERROR_TABLE
$DMERROR_RTTABLE
<end table>
ENDL

echo "Scrub after removing injected media error"

# Scrub one last time to make sure the error's gone.
_scratch_scrub -x >> $seqres.full 2> $tmp.error
cat $tmp.error | filter_scrub_errors

# success, all done
status=0
exit