From 6fcfc7df4c51118e5b57669c9c3d1ae235269c98 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 1 Sep 2021 10:46:56 -0700 Subject: vfs: introduce new file range exchange ioctl Introduce a new ioctl to handle swapping ranges of bytes between files. Signed-off-by: Darrick J. Wong --- include/linux/fs.h | 13 +++++- include/uapi/linux/fiexchange.h | 101 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/fiexchange.h (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bbf812ce89a8..bddc6746f656 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -44,6 +44,7 @@ #include #include +#include struct backing_dev_info; struct bdi_writeback; @@ -2106,6 +2107,8 @@ struct file_operations { loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); + int (*xchg_file_range)(struct file *file1, struct file *file2, + struct file_xchg_range *fsr); int (*fadvise)(struct file *, loff_t, loff_t, int); } __randomize_layout; @@ -2178,6 +2181,9 @@ extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); +extern int generic_xchg_file_range_prep(struct file *file1, struct file *file2, + struct file_xchg_range *fsr, + unsigned int blocksize); extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); @@ -2189,7 +2195,12 @@ extern int vfs_dedupe_file_range(struct file *file, extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, loff_t len, unsigned int remap_flags); - +extern int do_xchg_file_range(struct file *file1, struct file *file2, + struct file_xchg_range *fsr); +extern int vfs_xchg_file_range(struct file *file1, struct file *file2, + struct file_xchg_range *fsr); +extern int generic_xchg_file_range_check_fresh(struct inode *inode2, + const struct file_xchg_range *fsr); struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); diff --git a/include/uapi/linux/fiexchange.h b/include/uapi/linux/fiexchange.h new file mode 100644 index 000000000000..6e37dd4b5668 --- /dev/null +++ b/include/uapi/linux/fiexchange.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */ +/* + * FIEXCHANGE_RANGE ioctl definitions, to facilitate exchanging parts of files. + * + * Copyright (C) 2021 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong + */ +#ifndef _LINUX_FIEXCHANGE_H +#define _LINUX_FIEXCHANGE_H + +#include + +/* + * Exchange part of file1 with part of the file that this ioctl that is being + * called against (which we'll call file2). Filesystems must be able to + * restart and complete the operation even after the system goes down. + */ +struct file_xchg_range { + __s64 file1_fd; + __s64 file1_offset; /* file1 offset, bytes */ + __s64 file2_offset; /* file2 offset, bytes */ + __u64 length; /* bytes to exchange */ + + __u64 flags; /* see FILE_XCHG_RANGE_* below */ + + /* file2 metadata for optional freshness checks */ + __s64 file2_ino; /* inode number */ + __s64 file2_mtime; /* modification time */ + __s64 file2_ctime; /* change time */ + __s32 file2_mtime_nsec; /* mod time, nsec */ + __s32 file2_ctime_nsec; /* change time, nsec */ + + __u64 pad[6]; /* must be zeroes */ +}; + +/* + * Atomic exchange operations are not required. This relaxes the requirement + * that the filesystem must be able to complete the operation after a crash. + */ +#define FILE_XCHG_RANGE_NONATOMIC (1 << 0) + +/* + * Check that file2's inode number, mtime, and ctime against the values + * provided, and return -EBUSY if there isn't an exact match. + */ +#define FILE_XCHG_RANGE_FILE2_FRESH (1 << 1) + +/* + * Check that the file1's length is equal to file1_offset + length, and that + * file2's length is equal to file2_offset + length. Returns -EDOM if there + * isn't an exact match. + */ +#define FILE_XCHG_RANGE_FULL_FILES (1 << 2) + +/* + * Exchange file data all the way to the ends of both files, and then exchange + * the file sizes. This flag can be used to replace a file's contents with a + * different amount of data. length will be ignored. + */ +#define FILE_XCHG_RANGE_TO_EOF (1 << 3) + +/* Flush all changes in file data and file metadata to disk before returning. */ +#define FILE_XCHG_RANGE_FSYNC (1 << 4) + +/* Dry run; do all the parameter verification but do not change anything. */ +#define FILE_XCHG_RANGE_DRY_RUN (1 << 5) + +/* + * Do not exchange any part of the range where file1's mapping is a hole. This + * can be used to emulate scatter-gather atomic writes with a temp file. + */ +#define FILE_XCHG_RANGE_SKIP_FILE1_HOLES (1 << 6) + +/* + * Commit the contents of file1 into file2 if file2 has the same inode number, + * mtime, and ctime as the arguments provided to the call. The old contents of + * file2 will be moved to file1. + * + * With this flag, all committed information can be retrieved even if the + * system crashes or is rebooted. This includes writing through or flushing a + * disk cache if present. The call blocks until the device reports that the + * commit is complete. + * + * This flag should not be combined with NONATOMIC. It can be combined with + * SKIP_FILE1_HOLES. + */ +#define FILE_XCHG_RANGE_COMMIT (FILE_XCHG_RANGE_FILE2_FRESH | \ + FILE_XCHG_RANGE_FSYNC) + +#define FILE_XCHG_RANGE_ALL_FLAGS (FILE_XCHG_RANGE_NONATOMIC | \ + FILE_XCHG_RANGE_FILE2_FRESH | \ + FILE_XCHG_RANGE_FULL_FILES | \ + FILE_XCHG_RANGE_TO_EOF | \ + FILE_XCHG_RANGE_FSYNC | \ + FILE_XCHG_RANGE_DRY_RUN | \ + FILE_XCHG_RANGE_SKIP_FILE1_HOLES) + +#define FIEXCHANGE_RANGE _IOWR('X', 129, struct file_xchg_range) + +#endif /* _LINUX_FIEXCHANGE_H */ -- cgit v1.2.3