#! /bin/bash # SPDX-License-Identifier: GPL-2.0-or-later # Copyright (c) 2022 Oracle. All Rights Reserved. # # FS QA Test No. 556 # # Check xfs_scrub's media scan can actually return diagnostic information for # media errors in file data extents. . ./common/preamble _begin_fstest auto quick scrub eio # Override the default cleanup function. _cleanup() { cd / rm -f $tmp.* _dmerror_cleanup } # Import common functions. . ./common/fuzzy . ./common/filter . ./common/dmerror # real QA test starts here _supported_fs xfs _require_scratch _require_scratch_xfs_crc _require_scrub _require_dm_target error filter_scrub_errors() { _filter_scratch | sed \ -e "s/offset $((fs_blksz * 2)) /offset 2FSB /g" \ -e "s/length $fs_blksz.*/length 1FSB./g" } _scratch_mkfs >> $seqres.full _dmerror_init _dmerror_mount >> $seqres.full 2>&1 _require_scratch_xfs_scrub # Write a file with 4 file blocks worth of data victim=$SCRATCH_MNT/a file_blksz=$(_get_file_block_size $SCRATCH_MNT) $XFS_IO_PROG -f -c "pwrite -S 0x58 0 $((4 * file_blksz))" -c "fsync" $victim >> $seqres.full unset errordev _xfs_is_realtime_file $victim && errordev="RT" bmap_str="$($XFS_IO_PROG -c "bmap -elpv" $victim | grep "^[[:space:]]*0:")" echo "$errordev:$bmap_str" >> $seqres.full phys="$(echo "$bmap_str" | $AWK_PROG '{print $3}')" if [ "$errordev" = "RT" ]; then len="$(echo "$bmap_str" | $AWK_PROG '{print $4}')" else len="$(echo "$bmap_str" | $AWK_PROG '{print $6}')" fi fs_blksz=$(_get_block_size $SCRATCH_MNT) echo "file_blksz:$file_blksz:fs_blksz:$fs_blksz" >> $seqres.full kernel_sectors_per_fs_block=$((fs_blksz / 512)) # Did we get at least 4 fs blocks worth of extent? min_len_sectors=$(( 4 * kernel_sectors_per_fs_block )) test "$len" -lt $min_len_sectors && \ _fail "could not format a long enough extent on an empty fs??" phys_start=$(echo "$phys" | sed -e 's/\.\..*//g') echo "$errordev:$phys:$len:$fs_blksz:$phys_start" >> $seqres.full echo "victim file:" >> $seqres.full od -tx1 -Ad -c $victim >> $seqres.full # Set the dmerror table so that all IO will pass through. _dmerror_reset_table cat >> $seqres.full << ENDL dmerror before: $DMERROR_TABLE $DMERROR_RTTABLE ENDL # All sector numbers that we feed to the kernel must be in units of 512b, but # they also must be aligned to the device's logical block size. logical_block_size=$(_min_dio_alignment $SCRATCH_DEV) kernel_sectors_per_device_lba=$((logical_block_size / 512)) # Mark as bad one of the device LBAs in the middle of the extent. Target the # second LBA of the third block of the four-block file extent that we allocated # earlier, but without overflowing into the fourth file block. bad_sector=$(( phys_start + (2 * kernel_sectors_per_fs_block) )) bad_len=$kernel_sectors_per_device_lba if (( kernel_sectors_per_device_lba < kernel_sectors_per_fs_block )); then bad_sector=$((bad_sector + kernel_sectors_per_device_lba)) fi if (( (bad_sector % kernel_sectors_per_device_lba) != 0)); then echo "bad_sector $bad_sector not congruent with device logical block size $logical_block_size" fi _dmerror_mark_range_bad $bad_sector $bad_len $errordev cat >> $seqres.full << ENDL dmerror after marking bad: $DMERROR_TABLE $DMERROR_RTTABLE ENDL _dmerror_load_error_table # See if the media scan picks it up. echo "Scrub for injected media error (single threaded)" # Once in single-threaded mode _scratch_scrub -b -x >> $seqres.full 2> $tmp.error cat $tmp.error | filter_scrub_errors # Once in parallel mode echo "Scrub for injected media error (multi threaded)" _scratch_scrub -x >> $seqres.full 2> $tmp.error cat $tmp.error | filter_scrub_errors # Remount to flush the page cache and reread to see the IO error _dmerror_unmount _dmerror_mount echo "victim file:" >> $seqres.full od -tx1 -Ad -c $victim >> $seqres.full 2> $tmp.error cat $tmp.error | sed -e 's/read error: //g' | _filter_scratch # Scrub again to re-confirm the media error across a remount echo "Scrub for injected media error (after remount)" _scratch_scrub -x >> $seqres.full 2> $tmp.error cat $tmp.error | filter_scrub_errors # Now mark the bad range good so that a retest shows no media failure. _dmerror_mark_range_good $bad_sector $bad_len $errordev _dmerror_load_error_table cat >> $seqres.full << ENDL dmerror after marking good: $DMERROR_TABLE $DMERROR_RTTABLE ENDL echo "Scrub after removing injected media error" # Scrub one last time to make sure the error's gone. _scratch_scrub -x >> $seqres.full 2> $tmp.error cat $tmp.error | filter_scrub_errors # success, all done status=0 exit