xfs: enable CoW when rt extent size is larger than 1 block

Copy on write encounters a major plot twist when the file being CoW'd lives on the realtime volume and the realtime extent size is larger than a single filesystem block. XFS can only unmap and remap full rt extents, which means that allocations are always done in units of full rt extents, and a request to unmap less than one extent is treated as a request to convert an extent to unwritten status. This behavioral quirk is not compatible with the existing CoW mechanism, so we have to intercept every path through which files can be modified to ensure that we dirty an entire rt extent at once so that we can remap a full rt extent. Use the existing VFS unshare functions to dirty the page cache to set that up. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
author: Darrick J. Wong <djwong@kernel.org> 2021-09-01 11:19:50 -0700
committer: Darrick J. Wong <djwong@kernel.org> 2021-12-15 17:29:26 -0800
commit: c9da9d633c0ccbd1933ae2f58aaa99e68a4e3e0d (patch)
tree: 8dafe8df1fd8fd91b370cbd3a0e81153947e4537 /fs/xfs/xfs_reflink.c
parent: 7ce91d85f50da47ca729f30c17d6db3883d71807 (diff)
1 files changed, 39 insertions, 1 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 598861944112..f38cff39acd7 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -266,9 +266,26 @@ xfs_reflink_convert_cow_locked(
 	struct xfs_iext_cursor	icur;
 	struct xfs_bmbt_irec	got;
 	struct xfs_btree_cur	*dummy_cur = NULL;
+	struct xfs_mount	*mp = ip->i_mount;
 	int			dummy_logflags;
 	int			error = 0;
 
+	/*
+	 * We can only remap full rt extents, so make sure that we convert the
+	 * entire extent.  The caller must ensure that this is either a direct
+	 * write that's aligned to the rt extent size, or a buffered write for
+	 * which we've dirtied extra pages to make this work properly.
+	 */
+	if (xfs_inode_needs_cow_around(ip)) {
+		xfs_fileoff_t	new_off;
+
+		new_off = rounddown_64(offset_fsb, mp->m_sb.sb_rextsize);
+		count_fsb += offset_fsb - new_off;
+		offset_fsb = new_off;
+
+		count_fsb = roundup_64(count_fsb, mp->m_sb.sb_rextsize);
+	}
+
 	if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got))
 		return 0;
 
@@ -475,11 +492,21 @@ xfs_reflink_cancel_cow_blocks(
 	bool				cancel_real)
 {
 	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+	struct xfs_mount		*mp = ip->i_mount;
 	struct xfs_bmbt_irec		got, del;
 	struct xfs_iext_cursor		icur;
 	bool				isrt = XFS_IS_REALTIME_INODE(ip);
 	int				error = 0;
 
+	/*
+	 * Shrink the range that we're cancelling if they don't align to the
+	 * realtime extent size, since we can only free full extents.
+	 */
+	if (xfs_inode_needs_cow_around(ip)) {
+		offset_fsb = roundup_64(offset_fsb, mp->m_sb.sb_rextsize);
+		end_fsb = rounddown_64(end_fsb, mp->m_sb.sb_rextsize);
+	}
+
 	if (!xfs_inode_has_cow_data(ip))
 		return 0;
 	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
@@ -779,6 +806,7 @@ xfs_reflink_end_cow(
 	xfs_off_t			offset,
 	xfs_off_t			count)
 {
+	struct xfs_mount		*mp = ip->i_mount;
 	xfs_fileoff_t			offset_fsb;
 	xfs_fileoff_t			end_fsb;
 	int				error = 0;
@@ -789,6 +817,16 @@ xfs_reflink_end_cow(
 	end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
 
 	/*
+	 * Make sure the end is aligned with a rt extent (if desired), since
+	 * the end of the range could be EOF.  The _convert_cow function should
+	 * have set us up to swap only full rt extents.
+	 */
+	if (xfs_inode_needs_cow_around(ip)) {
+		offset_fsb = rounddown_64(offset_fsb, mp->m_sb.sb_rextsize);
+		end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
+	}
+
+	/*
 	 * Walk forwards until we've remapped the I/O range.  The loop function
 	 * repeatedly cycles the ILOCK to allocate one transaction per remapped
 	 * extent.
@@ -1625,7 +1663,7 @@ xfs_reflink_unshare(
 
 	inode_dio_wait(inode);
 
-	error = iomap_file_unshare(inode, offset, len,
+	error = iomap_file_unshare(VFS_I(ip), offset, len,
 			&xfs_buffered_write_iomap_ops);
 	if (error)
 		goto out;
author	Darrick J. Wong <djwong@kernel.org>	2021-09-01 11:19:50 -0700
committer	Darrick J. Wong <djwong@kernel.org>	2021-12-15 17:29:26 -0800
commit	c9da9d633c0ccbd1933ae2f58aaa99e68a4e3e0d (patch)
tree	8dafe8df1fd8fd91b370cbd3a0e81153947e4537 /fs/xfs/xfs_reflink.c
parent	7ce91d85f50da47ca729f30c17d6db3883d71807 (diff)