summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_reflink.c
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 11:19:50 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-12-15 17:29:26 -0800
commitc9da9d633c0ccbd1933ae2f58aaa99e68a4e3e0d (patch)
tree8dafe8df1fd8fd91b370cbd3a0e81153947e4537 /fs/xfs/xfs_reflink.c
parent7ce91d85f50da47ca729f30c17d6db3883d71807 (diff)
xfs: enable CoW when rt extent size is larger than 1 block
Copy on write encounters a major plot twist when the file being CoW'd lives on the realtime volume and the realtime extent size is larger than a single filesystem block. XFS can only unmap and remap full rt extents, which means that allocations are always done in units of full rt extents, and a request to unmap less than one extent is treated as a request to convert an extent to unwritten status. This behavioral quirk is not compatible with the existing CoW mechanism, so we have to intercept every path through which files can be modified to ensure that we dirty an entire rt extent at once so that we can remap a full rt extent. Use the existing VFS unshare functions to dirty the page cache to set that up. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r--fs/xfs/xfs_reflink.c40
1 files changed, 39 insertions, 1 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 598861944112..f38cff39acd7 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -266,9 +266,26 @@ xfs_reflink_convert_cow_locked(
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec got;
struct xfs_btree_cur *dummy_cur = NULL;
+ struct xfs_mount *mp = ip->i_mount;
int dummy_logflags;
int error = 0;
+ /*
+ * We can only remap full rt extents, so make sure that we convert the
+ * entire extent. The caller must ensure that this is either a direct
+ * write that's aligned to the rt extent size, or a buffered write for
+ * which we've dirtied extra pages to make this work properly.
+ */
+ if (xfs_inode_needs_cow_around(ip)) {
+ xfs_fileoff_t new_off;
+
+ new_off = rounddown_64(offset_fsb, mp->m_sb.sb_rextsize);
+ count_fsb += offset_fsb - new_off;
+ offset_fsb = new_off;
+
+ count_fsb = roundup_64(count_fsb, mp->m_sb.sb_rextsize);
+ }
+
if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got))
return 0;
@@ -475,11 +492,21 @@ xfs_reflink_cancel_cow_blocks(
bool cancel_real)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ struct xfs_mount *mp = ip->i_mount;
struct xfs_bmbt_irec got, del;
struct xfs_iext_cursor icur;
bool isrt = XFS_IS_REALTIME_INODE(ip);
int error = 0;
+ /*
+ * Shrink the range that we're cancelling if they don't align to the
+ * realtime extent size, since we can only free full extents.
+ */
+ if (xfs_inode_needs_cow_around(ip)) {
+ offset_fsb = roundup_64(offset_fsb, mp->m_sb.sb_rextsize);
+ end_fsb = rounddown_64(end_fsb, mp->m_sb.sb_rextsize);
+ }
+
if (!xfs_inode_has_cow_data(ip))
return 0;
if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
@@ -779,6 +806,7 @@ xfs_reflink_end_cow(
xfs_off_t offset,
xfs_off_t count)
{
+ struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t end_fsb;
int error = 0;
@@ -789,6 +817,16 @@ xfs_reflink_end_cow(
end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
/*
+ * Make sure the end is aligned with a rt extent (if desired), since
+ * the end of the range could be EOF. The _convert_cow function should
+ * have set us up to swap only full rt extents.
+ */
+ if (xfs_inode_needs_cow_around(ip)) {
+ offset_fsb = rounddown_64(offset_fsb, mp->m_sb.sb_rextsize);
+ end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
+ }
+
+ /*
* Walk forwards until we've remapped the I/O range. The loop function
* repeatedly cycles the ILOCK to allocate one transaction per remapped
* extent.
@@ -1625,7 +1663,7 @@ xfs_reflink_unshare(
inode_dio_wait(inode);
- error = iomap_file_unshare(inode, offset, len,
+ error = iomap_file_unshare(VFS_I(ip), offset, len,
&xfs_buffered_write_iomap_ops);
if (error)
goto out;