diff options
-rw-r--r-- | fs/xfs/xfs_reflink.c | 66 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 21 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 1 |
4 files changed, 81 insertions, 9 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index b93faa819894..0c73aa441c47 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1348,6 +1348,13 @@ xfs_reflink_remap_blocks( len = min_t(xfs_filblks_t, XFS_B_TO_FSB(mp, remap_len), XFS_MAX_FILEOFF); + /* + * Make sure the end is aligned with a rt extent (if desired), since + * the end of the range could be EOF. + */ + if (xfs_inode_has_bigrtextents(dest)) + len = roundup_64(len, mp->m_sb.sb_rextsize); + trace_xfs_reflink_remap_blocks(src, srcoff, len, dest, destoff); while (len > 0) { @@ -1421,6 +1428,50 @@ xfs_reflink_zero_posteof( &xfs_buffered_write_iomap_ops); } +/* Adjust the length of the remap operation to end on a rt extent boundary. */ +STATIC int +xfs_reflink_remap_adjust_rtlen( + struct xfs_inode *src, + loff_t pos_in, + struct xfs_inode *dest, + loff_t pos_out, + loff_t *len, + unsigned int remap_flags) +{ + struct xfs_mount *mp = src->i_mount; + uint32_t mod; + + div_u64_rem(*len, XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize), &mod); + + /* + * We previously checked the rtextent alignment of both offsets, so we + * now have to check the alignment of the length. The VFS remap prep + * function can change the length on us, so we can only make length + * adjustments after that. If the length is aligned to an rtextent, + * we're trivially good to go. + * + * Otherwise, the length is not aligned to an rt extent. If the source + * file's range ends at EOF, the VFS ensured that the dest file's range + * also ends at EOF. The actual remap function will round the (byte) + * length up to the nearest rtextent unit, so we're ok here too. + */ + if (mod == 0 || pos_in + *len == i_size_read(VFS_I(src))) + return 0; + + /* + * Otherwise, the only thing we can do is round the request length down + * to an rt extent boundary. If the caller doesn't allow that, we are + * finished. + */ + if (!(remap_flags & REMAP_FILE_CAN_SHORTEN)) + return -EINVAL; + + /* Back off by a single extent. */ + (*len) -= mod; + trace_xfs_reflink_remap_adjust_rtlen(src, pos_in, *len, dest, pos_out); + return 0; +} + /* * Prepare two files for range cloning. Upon a successful return both inodes * will have the iolock and mmaplock held, the page cache of the out file will @@ -1480,11 +1531,22 @@ xfs_reflink_remap_prep( if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; - ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - len, remap_flags); + ASSERT(is_power_of_2(xfs_inode_alloc_unitsize(dest))); + + ret = __generic_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags, + xfs_inode_alloc_unitsize(dest)); if (ret || *len == 0) goto out_unlock; + /* Make sure the end is aligned with a rt extent. */ + if (xfs_inode_has_bigrtextents(src)) { + ret = xfs_reflink_remap_adjust_rtlen(src, pos_in, dest, + pos_out, len, remap_flags); + if (ret || *len == 0) + goto out_unlock; + } + /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 15f5a405d7d4..0a1114df236f 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1039,7 +1039,7 @@ xfs_growfs_rt( if (!xfs_has_metadir(mp) && (xfs_has_rmapbt(mp) || xfs_has_reflink(mp))) return -EOPNOTSUPP; - if (xfs_has_reflink(mp) && in->extsize != 1) + if (xfs_has_reflink(mp) && !is_power_of_2(mp->m_sb.sb_rextsize)) return -EOPNOTSUPP; nrblocks = in->newblocks; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index a70862935988..1c924fe18f3e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1682,13 +1682,23 @@ xfs_fs_fill_super( if (xfs_has_reflink(mp)) { /* - * Reflink doesn't support rt extent sizes larger than a single - * block because we would have to perform unshare-around for - * rtext-unaligned write requests. + * Reflink doesn't support pagecache pages that span multiple + * realtime extents because iomap doesn't track subpage dirty + * state. This means that we cannot dirty all the pages + * backing an rt extent without dirtying the adjoining rt + * extents. If those rt extents are shared and extend into + * other pages, this leads to crazy write amplification. The + * VFS remap_range checks assume power-of-two block sizes, so + * we don't support that either. + * + * Hence we only support rt extent sizes that are an integer + * power of two because we know those will align with the page + * size. */ - if (xfs_has_realtime(mp) && mp->m_sb.sb_rextsize != 1) { + if (xfs_has_realtime(mp) && + !is_power_of_2(mp->m_sb.sb_rextsize)) { xfs_alert(mp, - "reflink not compatible with realtime extent size %u!", + "reflink not compatible with non-power-of-2 realtime extent size %u!", mp->m_sb.sb_rextsize); error = -EINVAL; goto out_filestream_unmount; @@ -1707,7 +1717,6 @@ xfs_fs_fill_super( } } - error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e3d4111ac387..de536012a04d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3616,6 +3616,7 @@ TRACE_EVENT(xfs_reflink_remap_blocks, __entry->dest_lblk) ); DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_range); +DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_adjust_rtlen); DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error); |