diff options
Diffstat (limited to 'fs/xfs/xfs_aops.c')
-rw-r--r-- | fs/xfs/xfs_aops.c | 66 |
1 files changed, 52 insertions, 14 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 67877c36ed11..5077d52a775d 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -19,6 +19,7 @@ #include "xfs_reflink.h" #include "xfs_errortag.h" #include "xfs_error.h" +#include "xfs_icache.h" struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; @@ -114,7 +115,7 @@ xfs_end_ioend( */ error = blk_status_to_errno(ioend->io_bio.bi_status); if (unlikely(error)) { - if (ioend->io_flags & IOMAP_F_SHARED) { + if (ioend->io_flags & IOMAP_IOEND_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset, offset + size); @@ -125,9 +126,9 @@ xfs_end_ioend( /* * Success: commit the COW or unwritten blocks if needed. */ - if (ioend->io_flags & IOMAP_F_SHARED) + if (ioend->io_flags & IOMAP_IOEND_SHARED) error = xfs_reflink_end_cow(ip, offset, size); - else if (ioend->io_type == IOMAP_UNWRITTEN) + else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN) error = xfs_iomap_write_unwritten(ip, offset, size, false); if (!error && xfs_ioend_is_append(ioend)) @@ -395,10 +396,11 @@ allocate_blocks: } static int -xfs_prepare_ioend( - struct iomap_ioend *ioend, +xfs_submit_ioend( + struct iomap_writepage_ctx *wpc, int status) { + struct iomap_ioend *ioend = wpc->ioend; unsigned int nofs_flag; /* @@ -409,7 +411,7 @@ xfs_prepare_ioend( nofs_flag = memalloc_nofs_save(); /* Convert CoW extents to regular */ - if (!status && (ioend->io_flags & IOMAP_F_SHARED)) { + if (!status && (ioend->io_flags & IOMAP_IOEND_SHARED)) { status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), ioend->io_offset, ioend->io_size); } @@ -417,10 +419,14 @@ xfs_prepare_ioend( memalloc_nofs_restore(nofs_flag); /* send ioends that might require a transaction to the completion wq */ - if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN || - (ioend->io_flags & IOMAP_F_SHARED)) + if (xfs_ioend_is_append(ioend) || + (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))) ioend->io_bio.bi_end_io = xfs_end_bio; - return status; + + if (status) + return status; + submit_bio(&ioend->io_bio); + return 0; } /* @@ -462,7 +468,7 @@ xfs_discard_folio( static const struct iomap_writeback_ops xfs_writeback_ops = { .map_blocks = xfs_map_blocks, - .prepare_ioend = xfs_prepare_ioend, + .submit_ioend = xfs_submit_ioend, .discard_folio = xfs_discard_folio, }; @@ -528,12 +534,44 @@ xfs_vm_readahead( } static int -xfs_iomap_swapfile_activate( +xfs_vm_swap_activate( struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; + struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + + /* + * Swap file activation can race against concurrent shared extent + * removal in files that have been cloned. If this happens, + * iomap_swapfile_iter() can fail because it encountered a shared + * extent even though an operation is in progress to remove those + * shared extents. + * + * This race becomes problematic when we defer extent removal + * operations beyond the end of a syscall (i.e. use async background + * processing algorithms). Users think the extents are no longer + * shared, but iomap_swapfile_iter() still sees them as shared + * because the refcountbt entries for the extents being removed have + * not yet been updated. Hence the swapon call fails unexpectedly. + * + * The race condition is currently most obvious from the unlink() + * operation as extent removal is deferred until after the last + * reference to the inode goes away. We then process the extent + * removal asynchronously, hence triggers the "syscall completed but + * work not done" condition mentioned above. To close this race + * window, we need to flush any pending inodegc operations to ensure + * they have updated the refcountbt records before we try to map the + * swapfile. + */ + xfs_inodegc_flush(ip->i_mount); + + /* + * Direct the swap code to the correct block device when this file + * sits on the RT device. + */ + sis->bdev = xfs_inode_buftarg(ip)->bt_bdev; + return iomap_swapfile_activate(sis, swap_file, span, &xfs_read_iomap_ops); } @@ -549,11 +587,11 @@ const struct address_space_operations xfs_address_space_operations = { .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_folio = generic_error_remove_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; const struct address_space_operations xfs_dax_aops = { .writepages = xfs_dax_writepages, .dirty_folio = noop_dirty_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; |