summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:46:56 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-12-15 17:28:58 -0800
commit6fcfc7df4c51118e5b57669c9c3d1ae235269c98 (patch)
treeca48f7c1df44fd3649683e53230eb2a543114b89 /include
parent0cf987c916e8390c67d7231aafdb78f1bb73b078 (diff)
vfs: introduce new file range exchange ioctl
Introduce a new ioctl to handle swapping ranges of bytes between files. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/fs.h13
-rw-r--r--include/uapi/linux/fiexchange.h101
2 files changed, 113 insertions, 1 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbf812ce89a8..bddc6746f656 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -44,6 +44,7 @@
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
+#include <uapi/linux/fiexchange.h>
struct backing_dev_info;
struct bdi_writeback;
@@ -2106,6 +2107,8 @@ struct file_operations {
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
+ int (*xchg_file_range)(struct file *file1, struct file *file2,
+ struct file_xchg_range *fsr);
int (*fadvise)(struct file *, loff_t, loff_t, int);
} __randomize_layout;
@@ -2178,6 +2181,9 @@ extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *count,
unsigned int remap_flags);
+extern int generic_xchg_file_range_prep(struct file *file1, struct file *file2,
+ struct file_xchg_range *fsr,
+ unsigned int blocksize);
extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
@@ -2189,7 +2195,12 @@ extern int vfs_dedupe_file_range(struct file *file,
extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
struct file *dst_file, loff_t dst_pos,
loff_t len, unsigned int remap_flags);
-
+extern int do_xchg_file_range(struct file *file1, struct file *file2,
+ struct file_xchg_range *fsr);
+extern int vfs_xchg_file_range(struct file *file1, struct file *file2,
+ struct file_xchg_range *fsr);
+extern int generic_xchg_file_range_check_fresh(struct inode *inode2,
+ const struct file_xchg_range *fsr);
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
diff --git a/include/uapi/linux/fiexchange.h b/include/uapi/linux/fiexchange.h
new file mode 100644
index 000000000000..6e37dd4b5668
--- /dev/null
+++ b/include/uapi/linux/fiexchange.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */
+/*
+ * FIEXCHANGE_RANGE ioctl definitions, to facilitate exchanging parts of files.
+ *
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef _LINUX_FIEXCHANGE_H
+#define _LINUX_FIEXCHANGE_H
+
+#include <linux/types.h>
+
+/*
+ * Exchange part of file1 with part of the file that this ioctl that is being
+ * called against (which we'll call file2). Filesystems must be able to
+ * restart and complete the operation even after the system goes down.
+ */
+struct file_xchg_range {
+ __s64 file1_fd;
+ __s64 file1_offset; /* file1 offset, bytes */
+ __s64 file2_offset; /* file2 offset, bytes */
+ __u64 length; /* bytes to exchange */
+
+ __u64 flags; /* see FILE_XCHG_RANGE_* below */
+
+ /* file2 metadata for optional freshness checks */
+ __s64 file2_ino; /* inode number */
+ __s64 file2_mtime; /* modification time */
+ __s64 file2_ctime; /* change time */
+ __s32 file2_mtime_nsec; /* mod time, nsec */
+ __s32 file2_ctime_nsec; /* change time, nsec */
+
+ __u64 pad[6]; /* must be zeroes */
+};
+
+/*
+ * Atomic exchange operations are not required. This relaxes the requirement
+ * that the filesystem must be able to complete the operation after a crash.
+ */
+#define FILE_XCHG_RANGE_NONATOMIC (1 << 0)
+
+/*
+ * Check that file2's inode number, mtime, and ctime against the values
+ * provided, and return -EBUSY if there isn't an exact match.
+ */
+#define FILE_XCHG_RANGE_FILE2_FRESH (1 << 1)
+
+/*
+ * Check that the file1's length is equal to file1_offset + length, and that
+ * file2's length is equal to file2_offset + length. Returns -EDOM if there
+ * isn't an exact match.
+ */
+#define FILE_XCHG_RANGE_FULL_FILES (1 << 2)
+
+/*
+ * Exchange file data all the way to the ends of both files, and then exchange
+ * the file sizes. This flag can be used to replace a file's contents with a
+ * different amount of data. length will be ignored.
+ */
+#define FILE_XCHG_RANGE_TO_EOF (1 << 3)
+
+/* Flush all changes in file data and file metadata to disk before returning. */
+#define FILE_XCHG_RANGE_FSYNC (1 << 4)
+
+/* Dry run; do all the parameter verification but do not change anything. */
+#define FILE_XCHG_RANGE_DRY_RUN (1 << 5)
+
+/*
+ * Do not exchange any part of the range where file1's mapping is a hole. This
+ * can be used to emulate scatter-gather atomic writes with a temp file.
+ */
+#define FILE_XCHG_RANGE_SKIP_FILE1_HOLES (1 << 6)
+
+/*
+ * Commit the contents of file1 into file2 if file2 has the same inode number,
+ * mtime, and ctime as the arguments provided to the call. The old contents of
+ * file2 will be moved to file1.
+ *
+ * With this flag, all committed information can be retrieved even if the
+ * system crashes or is rebooted. This includes writing through or flushing a
+ * disk cache if present. The call blocks until the device reports that the
+ * commit is complete.
+ *
+ * This flag should not be combined with NONATOMIC. It can be combined with
+ * SKIP_FILE1_HOLES.
+ */
+#define FILE_XCHG_RANGE_COMMIT (FILE_XCHG_RANGE_FILE2_FRESH | \
+ FILE_XCHG_RANGE_FSYNC)
+
+#define FILE_XCHG_RANGE_ALL_FLAGS (FILE_XCHG_RANGE_NONATOMIC | \
+ FILE_XCHG_RANGE_FILE2_FRESH | \
+ FILE_XCHG_RANGE_FULL_FILES | \
+ FILE_XCHG_RANGE_TO_EOF | \
+ FILE_XCHG_RANGE_FSYNC | \
+ FILE_XCHG_RANGE_DRY_RUN | \
+ FILE_XCHG_RANGE_SKIP_FILE1_HOLES)
+
+#define FIEXCHANGE_RANGE _IOWR('X', 129, struct file_xchg_range)
+
+#endif /* _LINUX_FIEXCHANGE_H */