From d61bec08b904cf171835db98168f82bc338e92e4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 May 2021 09:38:27 -0400 Subject: btrfs: mark ordered extent and inode with error if we fail to finish While doing error injection testing I saw that sometimes we'd get an abort that wouldn't stop the current transaction commit from completing. This abort was coming from finish ordered IO, but at this point in the transaction commit we should have gotten an error and stopped. It turns out the abort came from finish ordered io while trying to write out the free space cache. It occurred to me that any failure inside of finish_ordered_io isn't actually raised to the person doing the writing, so we could have any number of failures in this path and think the ordered extent completed successfully and the inode was fine. Fix this by marking the ordered extent with BTRFS_ORDERED_IOERR, and marking the mapping of the inode with mapping_set_error, so any callers that simply call fdatawait will also get the error. With this we're seeing the IO error on the free space inode when we fail to do the finish_ordered_io. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb4ab408d670..e7de0c08b981 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3005,6 +3005,18 @@ out: if (ret || truncated) { u64 unwritten_start = start; + /* + * If we failed to finish this ordered extent for any reason we + * need to make sure BTRFS_ORDERED_IOERR is set on the ordered + * extent, and mark the inode with the error if it wasn't + * already set. Any error during writeback would have already + * set the mapping error, so we need to set it if we're the ones + * marking this ordered extent as failed. + */ + if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR, + &ordered_extent->flags)) + mapping_set_error(ordered_extent->inode->i_mapping, -EIO); + if (truncated) unwritten_start += logical_len; clear_extent_uptodate(io_tree, unwritten_start, end, NULL); -- cgit v1.2.3 From dc09ef3562726cd520c8338c1640872a60187af5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 May 2021 14:04:21 -0400 Subject: btrfs: abort in rename_exchange if we fail to insert the second ref Error injection stress uncovered a problem where we'd leave a dangling inode ref if we failed during a rename_exchange. This happens because we insert the inode ref for one side of the rename, and then for the other side. If this second inode ref insert fails we'll leave the first one dangling and leave a corrupt file system behind. Fix this by aborting if we did the insert for the first inode ref. CC: stable@vger.kernel.org # 4.9+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e7de0c08b981..f5d32d85247a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9101,6 +9101,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, int ret2; bool root_log_pinned = false; bool dest_log_pinned = false; + bool need_abort = false; /* we only allow rename subvolume link between subvolumes */ if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) @@ -9160,6 +9161,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, old_idx); if (ret) goto out_fail; + need_abort = true; } /* And now for the dest. */ @@ -9175,8 +9177,11 @@ static int btrfs_rename_exchange(struct inode *old_dir, new_ino, btrfs_ino(BTRFS_I(old_dir)), new_idx); - if (ret) + if (ret) { + if (need_abort) + btrfs_abort_transaction(trans, ret); goto out_fail; + } } /* Update inode version and ctime/mtime. */ -- cgit v1.2.3