#! /bin/bash # SPDX-License-Identifier: GPL-2.0 # Copyright (c) 2021 Oracle, Inc. All Rights Reserved. # # FS QA Test No. 648 # # Test nested log recovery with repeated (simulated) disk failures. We kick # off fsstress on a loopback filesystem mounted on the scratch fs, then switch # out the underlying scratch device with dm-error to see what happens when the # disk goes down. Having taken down both fses in this manner, remount them and # repeat. This test simulates VM hosts crashing to try to shake out CoW bugs # in writeback on the host that cause VM guests to fail to recover. # . ./common/preamble _begin_fstest shutdown auto log metadata eio recoveryloop _cleanup() { cd / $KILLALL_PROG -9 fsstress > /dev/null 2>&1 wait if [ -n "$loopmnt" ]; then $UMOUNT_PROG $loopmnt 2>/dev/null rm -r -f $loopmnt fi rm -f $tmp.* _dmerror_unmount _dmerror_cleanup } # Import common functions. . ./common/dmerror . ./common/reflink # Modify as appropriate. _supported_fs generic _require_scratch_reflink _require_cp_reflink _require_dm_target error _require_command "$KILLALL_PROG" "killall" _require_loop echo "Silence is golden." _scratch_mkfs >> $seqres.full 2>&1 _require_metadata_journaling $SCRATCH_DEV _dmerror_init _dmerror_mount # Create a fs image consuming 1/3 of the scratch fs scratch_freesp_bytes=$(_get_available_space $SCRATCH_MNT) loopimg_bytes=$((scratch_freesp_bytes / 3)) loopimg=$SCRATCH_MNT/testfs truncate -s $loopimg_bytes $loopimg _mkfs_dev $loopimg loopmnt=$tmp.mount mkdir -p $loopmnt scratch_aliveflag=$tmp.runsnap snap_aliveflag=$tmp.snapping snap_loop_fs() { touch "$snap_aliveflag" while [ -e "$scratch_aliveflag" ]; do rm -f $loopimg.a _cp_reflink $loopimg $loopimg.a sleep 1 done rm -f "$snap_aliveflag" } fsstress=($FSSTRESS_PROG $FSSTRESS_AVOID -d "$loopmnt" -n 999999 -p "$((LOAD_FACTOR * 4))") while _soak_loop_running $((25 * TIME_FACTOR)); do touch $scratch_aliveflag snap_loop_fs >> $seqres.full 2>&1 & if ! _mount $loopimg $loopmnt -o loop; then rm -f $scratch_aliveflag _metadump_dev $loopimg $seqres.loop.$i.md _fail "iteration $SOAK_LOOPIDX loopimg mount failed" break fi ("${fsstress[@]}" >> $seqres.full &) > /dev/null 2>&1 # purposely include 0 second sleeps to test shutdown immediately after # recovery sleep $((RANDOM % (3 * TIME_FACTOR) )) rm -f $scratch_aliveflag # This test aims to simulate sudden disk failure, which means that we # do not want to quiesce the filesystem or otherwise give it a chance # to flush its logs. Therefore we want to call dmsetup with the # --nolockfs parameter; to make this happen we must call the load # error table helper *without* 'lockfs'. _dmerror_load_error_table ps -e | grep fsstress > /dev/null 2>&1 while [ $? -eq 0 ]; do $KILLALL_PROG -9 fsstress > /dev/null 2>&1 wait > /dev/null 2>&1 ps -e | grep fsstress > /dev/null 2>&1 done for ((j = 0; j < 10; j++)); do test -e "$snap_aliveflag" || break sleep 1 done # Mount again to replay log after loading working table, so we have a # consistent fs after test. $UMOUNT_PROG $loopmnt is_unmounted=1 # We must unmount dmerror at here, or whole later testing will crash. # So try to umount enough times, before we have no choice. for ((j = 0; j < 100; j++)); do sleep 1 _dmerror_unmount > $tmp.unmount.err 2>&1 if [ $? -eq 0 ];then is_unmounted=0 break fi done if [ $is_unmounted -ne 0 ];then cat $tmp.unmount.err _fail "iteration $SOAK_LOOPIDX scratch unmount failed" fi _dmerror_load_working_table if ! _dmerror_mount; then _metadump_dev $DMERROR_DEV $seqres.scratch.$i.md _fail "iteration $SOAK_LOOPIDX scratch mount failed" fi done # Make sure the fs image file is ok if [ -f "$loopimg" ]; then if _mount -o loop -t $FSTYP $loopimg $loopmnt -o loop; then $UMOUNT_PROG $loopmnt &> /dev/null else _metadump_dev $DMERROR_DEV $seqres.scratch.final.md echo "final scratch mount failed" fi SCRATCH_RTDEV= SCRATCH_LOGDEV= _check_scratch_fs $loopimg fi # success, all done; let the test harness check the scratch fs status=0 exit