summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:54:13 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:40:43 -0700
commit796310607977817eaa59ba309f67e2fe01d78180 (patch)
treea646b91007d7df779e1d825d32775ab024bda337
parent885e9a90393720dc9d061d544d69143c5aff1ef4 (diff)
xfs: add a ->xchg_file_range handler
Add a function to handle file range exchange requests from the vfs. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/xfs_bmap_util.c1
-rw-r--r--fs/xfs/xfs_file.c57
-rw-r--r--fs/xfs/xfs_trace.c1
-rw-r--r--fs/xfs/xfs_trace.h120
-rw-r--r--fs/xfs/xfs_xchgrange.c377
-rw-r--r--fs/xfs/xfs_xchgrange.h19
6 files changed, 575 insertions, 0 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d261518c4f05..60eadc1a5b72 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -28,6 +28,7 @@
#include "xfs_icache.h"
#include "xfs_iomap.h"
#include "xfs_reflink.h"
+#include "xfs_swapext.h"
/* Kernel only BMAP related definitions and functions */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 60d95a6c3631..24a232f9c3ed 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -24,6 +24,7 @@
#include "xfs_pnfs.h"
#include "xfs_iomap.h"
#include "xfs_reflink.h"
+#include "xfs_xchgrange.h"
#include <linux/falloc.h>
#include <linux/backing-dev.h>
@@ -1185,6 +1186,61 @@ out_unlock:
}
STATIC int
+xfs_file_xchg_range(
+ struct file *file1,
+ struct file *file2,
+ struct file_xchg_range *fxr)
+{
+ struct inode *inode1 = file_inode(file1);
+ struct inode *inode2 = file_inode(file2);
+ struct xfs_inode *ip1 = XFS_I(inode1);
+ struct xfs_inode *ip2 = XFS_I(inode2);
+ struct xfs_mount *mp = ip1->i_mount;
+ unsigned int priv_flags = 0;
+ bool use_log = false;
+ int error;
+
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+
+ /* Update cmtime if the fd/inode don't forbid it. */
+ if (likely(!(file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)))
+ priv_flags |= XFS_XCHG_RANGE_UPD_CMTIME1;
+ if (likely(!(file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2)))
+ priv_flags |= XFS_XCHG_RANGE_UPD_CMTIME2;
+
+ /* Get permission to use log-assisted file content swaps. */
+ error = xfs_xchg_range_grab_log_assist(mp,
+ !(fxr->flags & FILE_XCHG_RANGE_NONATOMIC),
+ &use_log);
+ if (error)
+ return error;
+
+ /* Lock both files against IO */
+ error = xfs_ilock2_io_mmap(ip1, ip2);
+ if (error)
+ goto out_drop_feat;
+
+ /* Prepare and then exchange file contents. */
+ error = xfs_xchg_range_prep(file1, file2, fxr);
+ if (error)
+ goto out_unlock;
+
+ error = xfs_xchg_range(ip1, ip2, fxr, priv_flags);
+ if (error)
+ goto out_unlock;
+
+out_unlock:
+ xfs_iunlock2_io_mmap(ip1, ip2);
+out_drop_feat:
+ if (use_log)
+ xfs_xchg_range_rele_log_assist(mp);
+ if (error)
+ trace_xfs_file_xchg_range_error(ip2, error, _RET_IP_);
+ return error;
+}
+
+STATIC int
xfs_file_open(
struct inode *inode,
struct file *file)
@@ -1452,6 +1508,7 @@ const struct file_operations xfs_file_operations = {
.fallocate = xfs_file_fallocate,
.fadvise = xfs_file_fadvise,
.remap_file_range = xfs_file_remap_range,
+ .xchg_file_range = xfs_file_xchg_range,
};
const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 53f95fdf4229..d1ea002903fa 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -36,6 +36,7 @@
#include "xfs_error.h"
#include "xfs_bmap.h"
#include "xfs_swapext.h"
+#include "xfs_xchgrange.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 60789dcaf79f..c66bf262ccbd 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3525,10 +3525,130 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow);
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
+
+/* swapext tracepoints */
+DEFINE_INODE_ERROR_EVENT(xfs_file_xchg_range_error);
DEFINE_INODE_IREC_EVENT(xfs_swapext_extent1);
DEFINE_INODE_IREC_EVENT(xfs_swapext_extent2);
DEFINE_ITRUNC_EVENT(xfs_swapext_update_inode_size);
+#define FIEXCHANGE_FLAGS_STRS \
+ { FILE_XCHG_RANGE_NONATOMIC, "NONATOMIC" }, \
+ { FILE_XCHG_RANGE_FILE2_FRESH, "F2_FRESH" }, \
+ { FILE_XCHG_RANGE_FULL_FILES, "FULL" }, \
+ { FILE_XCHG_RANGE_TO_EOF, "TO_EOF" }, \
+ { FILE_XCHG_RANGE_FSYNC , "FSYNC" }, \
+ { FILE_XCHG_RANGE_DRY_RUN, "DRY_RUN" }, \
+ { FILE_XCHG_RANGE_SKIP_FILE1_HOLES, "SKIP_F1_HOLES" }
+
+/* file exchange-range tracepoint class */
+DECLARE_EVENT_CLASS(xfs_xchg_range_class,
+ TP_PROTO(struct xfs_inode *ip1, const struct file_xchg_range *fxr,
+ struct xfs_inode *ip2, unsigned int xchg_flags),
+ TP_ARGS(ip1, fxr, ip2, xchg_flags),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ip1_ino)
+ __field(loff_t, ip1_isize)
+ __field(loff_t, ip1_disize)
+ __field(xfs_ino_t, ip2_ino)
+ __field(loff_t, ip2_isize)
+ __field(loff_t, ip2_disize)
+
+ __field(loff_t, file1_offset)
+ __field(loff_t, file2_offset)
+ __field(unsigned long long, length)
+ __field(unsigned long long, vflags)
+ __field(unsigned int, xflags)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip1)->i_sb->s_dev;
+ __entry->ip1_ino = ip1->i_ino;
+ __entry->ip1_isize = VFS_I(ip1)->i_size;
+ __entry->ip1_disize = ip1->i_disk_size;
+ __entry->ip2_ino = ip2->i_ino;
+ __entry->ip2_isize = VFS_I(ip2)->i_size;
+ __entry->ip2_disize = ip2->i_disk_size;
+
+ __entry->file1_offset = fxr->file1_offset;
+ __entry->file2_offset = fxr->file2_offset;
+ __entry->length = fxr->length;
+ __entry->vflags = fxr->flags;
+ __entry->xflags = xchg_flags;
+ ),
+ TP_printk("dev %d:%d vfs_flags %s xchg_flags %s bytecount 0x%llx "
+ "ino1 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx -> "
+ "ino2 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_flags(__entry->vflags, "|", FIEXCHANGE_FLAGS_STRS),
+ __print_flags(__entry->xflags, "|", XCHG_RANGE_FLAGS_STRS),
+ __entry->length,
+ __entry->ip1_ino,
+ __entry->ip1_isize,
+ __entry->ip1_disize,
+ __entry->file1_offset,
+ __entry->ip2_ino,
+ __entry->ip2_isize,
+ __entry->ip2_disize,
+ __entry->file2_offset)
+)
+
+#define DEFINE_XCHG_RANGE_EVENT(name) \
+DEFINE_EVENT(xfs_xchg_range_class, name, \
+ TP_PROTO(struct xfs_inode *ip1, const struct file_xchg_range *fxr, \
+ struct xfs_inode *ip2, unsigned int xchg_flags), \
+ TP_ARGS(ip1, fxr, ip2, xchg_flags))
+DEFINE_XCHG_RANGE_EVENT(xfs_xchg_range_prep);
+DEFINE_XCHG_RANGE_EVENT(xfs_xchg_range_flush);
+DEFINE_XCHG_RANGE_EVENT(xfs_xchg_range);
+
+TRACE_EVENT(xfs_xchg_range_freshness,
+ TP_PROTO(struct xfs_inode *ip2, const struct file_xchg_range *fxr),
+ TP_ARGS(ip2, fxr),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ip2_ino)
+ __field(long long, ip2_mtime)
+ __field(long long, ip2_ctime)
+ __field(int, ip2_mtime_nsec)
+ __field(int, ip2_ctime_nsec)
+
+ __field(xfs_ino_t, file2_ino)
+ __field(long long, file2_mtime)
+ __field(long long, file2_ctime)
+ __field(int, file2_mtime_nsec)
+ __field(int, file2_ctime_nsec)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip2)->i_sb->s_dev;
+ __entry->ip2_ino = ip2->i_ino;
+ __entry->ip2_mtime = VFS_I(ip2)->i_mtime.tv_sec;
+ __entry->ip2_ctime = VFS_I(ip2)->i_ctime.tv_sec;
+ __entry->ip2_mtime_nsec = VFS_I(ip2)->i_mtime.tv_nsec;
+ __entry->ip2_ctime_nsec = VFS_I(ip2)->i_ctime.tv_nsec;
+
+ __entry->file2_ino = fxr->file2_ino;
+ __entry->file2_mtime = fxr->file2_mtime;
+ __entry->file2_ctime = fxr->file2_ctime;
+ __entry->file2_mtime_nsec = fxr->file2_mtime_nsec;
+ __entry->file2_ctime_nsec = fxr->file2_ctime_nsec;
+ ),
+ TP_printk("dev %d:%d "
+ "ino 0x%llx mtime %lld:%d ctime %lld:%d -> "
+ "file 0x%llx mtime %lld:%d ctime %lld:%d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ip2_ino,
+ __entry->ip2_mtime,
+ __entry->ip2_mtime_nsec,
+ __entry->ip2_ctime,
+ __entry->ip2_ctime_nsec,
+ __entry->file2_ino,
+ __entry->file2_mtime,
+ __entry->file2_mtime_nsec,
+ __entry->file2_ctime,
+ __entry->file2_ctime_nsec)
+);
+
/* fsmap traces */
DECLARE_EVENT_CLASS(xfs_fsmap_class,
TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno,
diff --git a/fs/xfs/xfs_xchgrange.c b/fs/xfs/xfs_xchgrange.c
index 5e7098d5838e..993e6c5ed11c 100644
--- a/fs/xfs/xfs_xchgrange.c
+++ b/fs/xfs/xfs_xchgrange.c
@@ -13,8 +13,15 @@
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
+#include "xfs_quota.h"
+#include "xfs_bmap_util.h"
+#include "xfs_reflink.h"
+#include "xfs_trace.h"
#include "xfs_swapext.h"
#include "xfs_xchgrange.h"
+#include "xfs_sb.h"
+#include "xfs_icache.h"
+#include "xfs_log.h"
/* Lock (and optionally join) two inodes for a file range exchange. */
void
@@ -64,3 +71,373 @@ xfs_xchg_range_estimate(
xfs_xchg_range_iunlock(req->ip1, req->ip2);
return error;
}
+
+/* Prepare two files to have their data exchanged. */
+int
+xfs_xchg_range_prep(
+ struct file *file1,
+ struct file *file2,
+ struct file_xchg_range *fxr)
+{
+ struct xfs_inode *ip1 = XFS_I(file_inode(file1));
+ struct xfs_inode *ip2 = XFS_I(file_inode(file2));
+ int error;
+
+ trace_xfs_xchg_range_prep(ip1, fxr, ip2, 0);
+
+ /* Verify both files are either real-time or non-realtime */
+ if (XFS_IS_REALTIME_INODE(ip1) != XFS_IS_REALTIME_INODE(ip2))
+ return -EINVAL;
+
+ /*
+ * The alignment checks in the VFS helpers cannot deal with allocation
+ * units that are not powers of 2. This can happen with the realtime
+ * volume if the extent size is set. Note that alignment checks are
+ * skipped if FULL_FILES is set.
+ */
+ if (!(fxr->flags & FILE_XCHG_RANGE_FULL_FILES) &&
+ !is_power_of_2(xfs_inode_alloc_unitsize(ip2)))
+ return -EOPNOTSUPP;
+
+ error = generic_xchg_file_range_prep(file1, file2, fxr,
+ xfs_inode_alloc_unitsize(ip2));
+ if (error || fxr->length == 0)
+ return error;
+
+ /* Attach dquots to both inodes before changing block maps. */
+ error = xfs_qm_dqattach(ip2);
+ if (error)
+ return error;
+ error = xfs_qm_dqattach(ip1);
+ if (error)
+ return error;
+
+ trace_xfs_xchg_range_flush(ip1, fxr, ip2, 0);
+
+ /* Flush the relevant ranges of both files. */
+ error = xfs_flush_unmap_range(ip2, fxr->file2_offset, fxr->length);
+ if (error)
+ return error;
+ error = xfs_flush_unmap_range(ip1, fxr->file1_offset, fxr->length);
+ if (error)
+ return error;
+
+ /*
+ * Cancel CoW fork preallocations for the ranges of both files. The
+ * prep function should have flushed all the dirty data, so the only
+ * extents remaining should be speculative.
+ */
+ if (xfs_inode_has_cow_data(ip1)) {
+ error = xfs_reflink_cancel_cow_range(ip1, fxr->file1_offset,
+ fxr->length, true);
+ if (error)
+ return error;
+ }
+
+ if (xfs_inode_has_cow_data(ip2)) {
+ error = xfs_reflink_cancel_cow_range(ip2, fxr->file2_offset,
+ fxr->length, true);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+#define QRETRY_IP1 (0x1)
+#define QRETRY_IP2 (0x2)
+
+/*
+ * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
+ * this if quota enforcement is disabled or if both inodes' dquots are the
+ * same. The qretry structure must be initialized to zeroes before the first
+ * call to this function.
+ */
+STATIC int
+xfs_xchg_range_reserve_quota(
+ struct xfs_trans *tp,
+ const struct xfs_swapext_req *req,
+ const struct xfs_swapext_res *res,
+ unsigned int *qretry)
+{
+ int64_t ddelta, rdelta;
+ int ip1_error = 0;
+ int error;
+
+ /*
+ * Don't bother with a quota reservation if we're not enforcing them
+ * or the two inodes have the same dquots.
+ */
+ if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
+ (req->ip1->i_udquot == req->ip2->i_udquot &&
+ req->ip1->i_gdquot == req->ip2->i_gdquot &&
+ req->ip1->i_pdquot == req->ip2->i_pdquot))
+ return 0;
+
+ *qretry = 0;
+
+ /*
+ * For each file, compute the net gain in the number of regular blocks
+ * that will be mapped into that file and reserve that much quota. The
+ * quota counts must be able to absorb at least that much space.
+ */
+ ddelta = res->ip2_bcount - res->ip1_bcount;
+ rdelta = res->ip2_rtbcount - res->ip1_rtbcount;
+ if (ddelta > 0 || rdelta > 0) {
+ error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
+ ddelta > 0 ? ddelta : 0,
+ rdelta > 0 ? rdelta : 0,
+ false);
+ if (error == -EDQUOT || error == -ENOSPC) {
+ /*
+ * Save this error and see what happens if we try to
+ * reserve quota for ip2. Then report both.
+ */
+ *qretry |= QRETRY_IP1;
+ ip1_error = error;
+ error = 0;
+ }
+ if (error)
+ return error;
+ }
+ if (ddelta < 0 || rdelta < 0) {
+ error = xfs_trans_reserve_quota_nblks(tp, req->ip2,
+ ddelta < 0 ? -ddelta : 0,
+ rdelta < 0 ? -rdelta : 0,
+ false);
+ if (error == -EDQUOT || error == -ENOSPC)
+ *qretry |= QRETRY_IP2;
+ if (error)
+ return error;
+ }
+ if (ip1_error)
+ return ip1_error;
+
+ /*
+ * For each file, forcibly reserve the gross gain in mapped blocks so
+ * that we don't trip over any quota block reservation assertions.
+ * We must reserve the gross gain because the quota code subtracts from
+ * bcount the number of blocks that we unmap; it does not add that
+ * quantity back to the quota block reservation.
+ */
+ error = xfs_trans_reserve_quota_nblks(tp, req->ip1, res->ip1_bcount,
+ res->ip1_rtbcount, true);
+ if (error)
+ return error;
+
+ return xfs_trans_reserve_quota_nblks(tp, req->ip2, res->ip2_bcount,
+ res->ip2_rtbcount, true);
+}
+
+/*
+ * Get permission to use log-assisted atomic exchange of file extents.
+ *
+ * Callers must not be running any transactions or hold any inode locks, and
+ * they must release the permission by calling xfs_xchg_range_rele_log_assist
+ * when they're done.
+ */
+int
+xfs_xchg_range_grab_log_assist(
+ struct xfs_mount *mp,
+ bool force,
+ bool *enabled)
+{
+ int error = 0;
+
+ /*
+ * Protect ourselves from an idle log clearing the atomic swapext
+ * log incompat feature bit.
+ */
+ xlog_use_incompat_feat(mp->m_log);
+ *enabled = true;
+
+ /*
+ * If log-assisted swapping is already enabled, the caller can use the
+ * log assisted swap functions with the log-incompat reference we got.
+ */
+ if (xfs_has_atomicswap(mp))
+ return 0;
+
+ /*
+ * If the caller doesn't /require/ log-assisted swapping, drop the
+ * log-incompat feature protection and exit. The caller cannot use
+ * log assisted swapping.
+ */
+ if (!force)
+ goto drop_incompat;
+
+ /*
+ * Caller requires log-assisted swapping but the fs feature set isn't
+ * rich enough to support it. Bail out.
+ */
+ if (!xfs_can_atomicswap(mp)) {
+ error = -EOPNOTSUPP;
+ goto drop_incompat;
+ }
+
+ error = xfs_add_atomicswap(mp);
+ if (error)
+ goto drop_incompat;
+
+ xfs_warn(mp,
+ "EXPERIMENTAL atomic file range swap feature added. Use at your own risk!");
+
+ return 0;
+drop_incompat:
+ xlog_drop_incompat_feat(mp->m_log);
+ *enabled = false;
+ return error;
+}
+
+/* Release permission to use log-assisted extent swapping. */
+void
+xfs_xchg_range_rele_log_assist(
+ struct xfs_mount *mp)
+{
+ xlog_drop_incompat_feat(mp->m_log);
+}
+
+/* Exchange the contents of two files. */
+int
+xfs_xchg_range(
+ struct xfs_inode *ip1,
+ struct xfs_inode *ip2,
+ const struct file_xchg_range *fxr,
+ unsigned int xchg_flags)
+{
+ struct xfs_mount *mp = ip1->i_mount;
+ struct xfs_swapext_req req = {
+ .ip1 = ip1,
+ .ip2 = ip2,
+ .whichfork = XFS_DATA_FORK,
+ .startoff1 = XFS_B_TO_FSBT(mp, fxr->file1_offset),
+ .startoff2 = XFS_B_TO_FSBT(mp, fxr->file2_offset),
+ .blockcount = XFS_B_TO_FSB(mp, fxr->length),
+ };
+ struct xfs_swapext_res res;
+ struct xfs_trans *tp;
+ unsigned int qretry;
+ bool retried = false;
+ int error;
+
+ trace_xfs_xchg_range(ip1, fxr, ip2, xchg_flags);
+
+ /*
+ * This function only supports using log intent items (SXI items if
+ * atomic exchange is required, or BUI items if not) to exchange file
+ * data. The legacy whole-fork swap will be ported in a later patch.
+ */
+ if (!xfs_has_atomicswap(mp) && !xfs_can_atomicswap(mp))
+ return -EOPNOTSUPP;
+
+ if (fxr->flags & FILE_XCHG_RANGE_TO_EOF)
+ req.req_flags |= XFS_SWAP_REQ_SET_SIZES;
+ if (fxr->flags & FILE_XCHG_RANGE_SKIP_FILE1_HOLES)
+ req.req_flags |= XFS_SWAP_REQ_SKIP_FILE1_HOLES;
+
+ error = xfs_xchg_range_estimate(&req, &res);
+ if (error)
+ return error;
+
+retry:
+ /* Allocate the transaction, lock the inodes, and join them. */
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, res.resblks, 0,
+ XFS_TRANS_RES_FDBLKS, &tp);
+ if (error)
+ return error;
+
+ xfs_xchg_range_ilock(tp, ip1, ip2);
+
+ trace_xfs_swap_extent_before(ip2, 0);
+ trace_xfs_swap_extent_before(ip1, 1);
+
+ if (fxr->flags & FILE_XCHG_RANGE_FILE2_FRESH)
+ trace_xfs_xchg_range_freshness(ip2, fxr);
+
+ /*
+ * Now that we've excluded all other inode metadata changes by taking
+ * the ILOCK, repeat the freshness check.
+ */
+ error = generic_xchg_file_range_check_fresh(VFS_I(ip2), fxr);
+ if (error)
+ goto out_trans_cancel;
+
+ error = xfs_swapext_check_extents(mp, &req);
+ if (error)
+ goto out_trans_cancel;
+
+ /*
+ * Reserve ourselves some quota if any of them are in enforcing mode.
+ * In theory we only need enough to satisfy the change in the number
+ * of blocks between the two ranges being remapped.
+ */
+ error = xfs_xchg_range_reserve_quota(tp, &req, &res, &qretry);
+ if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
+ xfs_trans_cancel(tp);
+ xfs_xchg_range_iunlock(ip1, ip2);
+ if (qretry & QRETRY_IP1)
+ xfs_blockgc_free_quota(ip1, 0);
+ if (qretry & QRETRY_IP2)
+ xfs_blockgc_free_quota(ip2, 0);
+ retried = true;
+ goto retry;
+ }
+ if (error)
+ goto out_trans_cancel;
+
+ /* If we got this far on a dry run, all parameters are ok. */
+ if (fxr->flags & FILE_XCHG_RANGE_DRY_RUN)
+ goto out_trans_cancel;
+
+ /* Update the mtime and ctime of both files. */
+ if (xchg_flags & XFS_XCHG_RANGE_UPD_CMTIME1)
+ xfs_trans_ichgtime(tp, ip1,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ if (xchg_flags & XFS_XCHG_RANGE_UPD_CMTIME2)
+ xfs_trans_ichgtime(tp, ip2,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ /* Exchange the file contents by swapping the block mappings. */
+ error = xfs_swapext(&tp, &req);
+ if (error)
+ goto out_trans_cancel;
+
+ /*
+ * If the caller wanted us to exchange the contents of two complete
+ * files of unequal length, exchange the incore sizes now. This should
+ * be safe because we flushed both files' page caches and moved all the
+ * post-eof extents, so there should not be anything to zero.
+ */
+ if (fxr->flags & FILE_XCHG_RANGE_TO_EOF) {
+ loff_t temp;
+
+ temp = i_size_read(VFS_I(ip2));
+ i_size_write(VFS_I(ip2), i_size_read(VFS_I(ip1)));
+ i_size_write(VFS_I(ip1), temp);
+ }
+
+ /* Relog the inodes to keep transactions moving forward. */
+ xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
+ xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
+
+ /*
+ * Force the log to persist metadata updates if the caller or the
+ * administrator requires this. The VFS prep function already flushed
+ * the relevant parts of the page cache.
+ */
+ if (xfs_has_wsync(mp) || (fxr->flags & FILE_XCHG_RANGE_FSYNC))
+ xfs_trans_set_sync(tp);
+
+ error = xfs_trans_commit(tp);
+
+ trace_xfs_swap_extent_after(ip2, 0);
+ trace_xfs_swap_extent_after(ip1, 1);
+
+out_unlock:
+ xfs_xchg_range_iunlock(ip1, ip2);
+ return error;
+
+out_trans_cancel:
+ xfs_trans_cancel(tp);
+ goto out_unlock;
+}
diff --git a/fs/xfs/xfs_xchgrange.h b/fs/xfs/xfs_xchgrange.h
index ddda2bfb6f4b..c1b4a1eec372 100644
--- a/fs/xfs/xfs_xchgrange.h
+++ b/fs/xfs/xfs_xchgrange.h
@@ -15,5 +15,24 @@ void xfs_xchg_range_iunlock(struct xfs_inode *ip1, struct xfs_inode *ip2);
int xfs_xchg_range_estimate(const struct xfs_swapext_req *req,
struct xfs_swapext_res *res);
+int xfs_xchg_range_prep(struct file *file1, struct file *file2,
+ struct file_xchg_range *fxr);
+
+int xfs_xchg_range_grab_log_assist(struct xfs_mount *mp, bool force,
+ bool *enabled);
+void xfs_xchg_range_rele_log_assist(struct xfs_mount *mp);
+
+/* Update ip1's change and mod time. */
+#define XFS_XCHG_RANGE_UPD_CMTIME1 (1 << 0)
+
+/* Update ip2's change and mod time. */
+#define XFS_XCHG_RANGE_UPD_CMTIME2 (1 << 1)
+
+#define XCHG_RANGE_FLAGS_STRS \
+ { XFS_XCHG_RANGE_UPD_CMTIME1, "UPD_CMTIME1" }, \
+ { XFS_XCHG_RANGE_UPD_CMTIME2, "UPD_CMTIME2" }
+
+int xfs_xchg_range(struct xfs_inode *ip1, struct xfs_inode *ip2,
+ const struct file_xchg_range *fxr, unsigned int xchg_flags);
#endif /* __XFS_XCHGRANGE_H__ */