summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-01-05 17:47:11 -0800
committerDarrick J. Wong <djwong@kernel.org>2021-03-25 17:08:53 -0700
commitfc1c5897bd8777e33c3fa7b215f61dec04850084 (patch)
treeadef6ea4e524a6d3f9525ca6f9583d1f4409ceab /fs
parentccc831c9449f69c5468f8abcde0d356a6bd81e9e (diff)
xfs: forcibly convert unwritten blocks within an rt extent before sharing
As noted in the previous patch, XFS can only unmap and map full rt extents. This means that we cannot stop mid-extent for any reason, including stepping around unwritten/written extents. Second, the reflink and CoW mechanisms were not designed to handle shared unwritten extents, so we have to do something to get rid of them. If the user asks us to remap two files, we must scan both ranges beforehand to convert any unwritten extents that are not aligned to rt extent boundaries into zeroed written extents before sharing. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_reflink.c126
1 files changed, 126 insertions, 0 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 51957f95be1b..4931b845f1ce 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1380,6 +1380,113 @@ xfs_reflink_zero_posteof(
}
/*
+ * Decide if this is an unwritten extent that isn't aligned to a rt extent
+ * boundary. If it is, shorten the mapping so that we're ready to convert
+ * everything up to the next rt extent to a zeroed written extent. If not,
+ * return false.
+ */
+static inline bool
+xfs_reflink_prep_conversion(
+ struct xfs_mount *mp,
+ struct xfs_bmbt_irec *irec)
+{
+ xfs_fileoff_t rext_next;
+ u32 modoff, modcnt;
+
+ if (irec->br_state != XFS_EXT_UNWRITTEN)
+ return false;
+
+ div_u64_rem(irec->br_startoff, mp->m_sb.sb_rextsize, &modoff);
+ div_u64_rem(irec->br_blockcount, mp->m_sb.sb_rextsize, &modcnt);
+ if (modoff == 0 && modcnt == 0)
+ return false;
+
+ rext_next = (irec->br_startoff - modoff) + mp->m_sb.sb_rextsize;
+ xfs_trim_extent(irec, irec->br_startoff, rext_next - irec->br_startoff);
+ return true;
+}
+
+/*
+ * Convert all unwritten extents to written so that we can share them. The
+ * reflink prep function already flushed all dirty pages to disk, so we can
+ * take care of this without going back to the VFS.
+ */
+static int
+xfs_reflink_convert_unwritten(
+ struct xfs_inode *src,
+ loff_t pos,
+ loff_t len)
+{
+ struct xfs_bmbt_irec irec;
+ struct xfs_trans *tp;
+ struct xfs_mount *mp = src->i_mount;
+ xfs_fileoff_t off = XFS_B_TO_FSBT(mp, pos);
+ xfs_fileoff_t endoff;
+ unsigned int resblks;
+ int ret;
+
+ off = rounddown_64(XFS_B_TO_FSBT(mp, pos), mp->m_sb.sb_rextsize);
+ endoff = roundup_64(XFS_B_TO_FSB(mp, pos + len), mp->m_sb.sb_rextsize);
+ while (off < endoff) {
+ int nmap = 1;
+
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 1);
+ ret = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
+ &tp);
+ if (ret)
+ return ret;
+
+ xfs_ilock(src, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, src, XFS_ILOCK_EXCL);
+
+ /*
+ * Read the mapping. If we find an unwritten extent that isn't
+ * aligned to an rt extent boundary...
+ */
+ ret = xfs_bmapi_read(src, off, endoff - off, &irec, &nmap, 0);
+ if (ret)
+ goto err;
+ ASSERT(nmap == 1);
+ ASSERT(irec.br_startoff == off);
+ if (!xfs_reflink_prep_conversion(mp, &irec)) {
+ xfs_trans_cancel(tp);
+ off += irec.br_blockcount;
+ continue;
+ }
+
+ /*
+ * ...make sure this partially unwritten rt extent gets
+ * converted to a zeroed written extent that we can remap.
+ */
+ nmap = 1;
+ ret = xfs_bmapi_write(tp, src, off, irec.br_blockcount,
+ XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &irec,
+ &nmap);
+ if (ret)
+ goto err;
+ ASSERT(nmap == 1);
+ if (irec.br_state != XFS_EXT_NORM) {
+ ASSERT(0);
+ ret = -EIO;
+ goto err;
+ }
+ ret = xfs_trans_commit(tp);
+ if (ret)
+ return ret;
+
+ off += irec.br_blockcount;
+ }
+
+ return 0;
+err:
+ xfs_trans_cancel(tp);
+ return ret;
+}
+
+/*
* Prepare two files for range cloning. Upon a successful return both inodes
* will have the iolock and mmaplock held, the page cache of the out file will
* be truncated, and any leases on the out file will have been broken. This
@@ -1462,6 +1569,25 @@ xfs_reflink_remap_prep(
goto out_unlock;
/*
+ * Now that we've marked both inodes for reflink, make sure that all
+ * possible rt extents in both files' ranges are either wholly written,
+ * wholly unwritten, or holes. The bmap code requires that we align
+ * all unmap and remap requests to a rt extent boundary. We've already
+ * flushed the page cache and finished directio, so we can convert the
+ * extents directly.
+ */
+ if (xfs_reflink_need_unshare_around(src)) {
+ ret = xfs_reflink_convert_unwritten(src, pos_in, *len);
+ if (ret)
+ return ret;
+ }
+ if (xfs_reflink_need_unshare_around(dest)) {
+ ret = xfs_reflink_convert_unwritten(dest, pos_out, *len);
+ if (ret)
+ return ret;
+ }
+
+ /*
* If pos_out > EOF, we may have dirtied blocks between EOF and
* pos_out. In that case, we need to extend the flush and unmap to cover
* from EOF to the end of the copy length.