summaryrefslogtreecommitdiff
path: root/fs/remap_range.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/remap_range.c')
-rw-r--r--fs/remap_range.c296
1 files changed, 296 insertions, 0 deletions
diff --git a/fs/remap_range.c b/fs/remap_range.c
index 654912d06862..e88ee9ad87b2 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -570,3 +570,299 @@ next_loop:
return ret;
}
EXPORT_SYMBOL(vfs_dedupe_file_range);
+
+/* Performs necessary checks before doing a range exchange. */
+static int generic_xchg_file_range_checks(struct file *file1,
+ struct file *file2,
+ struct file_xchg_range *fxr,
+ unsigned int blocksize)
+{
+ struct inode *inode1 = file1->f_mapping->host;
+ struct inode *inode2 = file2->f_mapping->host;
+ uint64_t blkmask = blocksize - 1;
+ int64_t test_len;
+ uint64_t blen;
+ loff_t size1, size2;
+ int ret;
+
+ /* Don't touch certain kinds of inodes */
+ if (IS_IMMUTABLE(inode1) || IS_IMMUTABLE(inode2))
+ return -EPERM;
+ if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
+ return -ETXTBSY;
+
+ size1 = i_size_read(inode1);
+ size2 = i_size_read(inode2);
+
+ /* Ranges cannot start after EOF. */
+ if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
+ return -EINVAL;
+
+ /*
+ * If the caller asked for full files, check that the offset/length
+ * values cover all of both files.
+ */
+ if ((fxr->flags & FILE_XCHG_RANGE_FULL_FILES) &&
+ (fxr->file1_offset != 0 || fxr->file2_offset != 0 ||
+ fxr->length != size1 || fxr->length != size2))
+ return -EDOM;
+
+ /*
+ * If the caller said to exchange to EOF, we set the length of the
+ * request large enough to cover everything to the end of both files.
+ */
+ if (fxr->flags & FILE_XCHG_RANGE_TO_EOF)
+ fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
+ size2 - fxr->file2_offset);
+
+ /* The start of both ranges must be aligned to an fs block. */
+ if (!IS_ALIGNED(fxr->file1_offset, blocksize) ||
+ !IS_ALIGNED(fxr->file2_offset, blocksize))
+ return -EINVAL;
+
+ /* Ensure offsets don't wrap. */
+ if (fxr->file1_offset + fxr->length < fxr->file1_offset ||
+ fxr->file2_offset + fxr->length < fxr->file2_offset)
+ return -EINVAL;
+
+ /*
+ * We require both ranges to be within EOF, unless we're exchanging
+ * to EOF. generic_xchg_range_prep already checked that both
+ * fxr->file1_offset and fxr->file2_offset are within EOF.
+ */
+ if (!(fxr->flags & FILE_XCHG_RANGE_TO_EOF) &&
+ (fxr->file1_offset + fxr->length > size1 ||
+ fxr->file2_offset + fxr->length > size2))
+ return -EINVAL;
+
+ /*
+ * Make sure we don't hit any file size limits. If we hit any size
+ * limits such that test_length was adjusted, we abort the whole
+ * operation.
+ */
+ test_len = fxr->length;
+ ret = generic_write_check_limits(file2, fxr->file2_offset, &test_len);
+ if (ret)
+ return ret;
+ ret = generic_write_check_limits(file1, fxr->file1_offset, &test_len);
+ if (ret)
+ return ret;
+ if (test_len != fxr->length)
+ return -EINVAL;
+
+ /*
+ * If the user wanted us to exchange up to the infile's EOF, round up
+ * to the next block boundary for this check. Do the same for the
+ * outfile.
+ *
+ * Otherwise, reject the range length if it's not block aligned. We
+ * already confirmed the starting offsets' block alignment.
+ */
+ if (fxr->file1_offset + fxr->length == size1)
+ blen = ALIGN(size1, blocksize) - fxr->file1_offset;
+ else if (fxr->file2_offset + fxr->length == size2)
+ blen = ALIGN(size2, blocksize) - fxr->file2_offset;
+ else if (!IS_ALIGNED(fxr->length, blocksize))
+ return -EINVAL;
+ else
+ blen = fxr->length;
+
+ /* Don't allow overlapped exchanges within the same file. */
+ if (inode1 == inode2 &&
+ fxr->file2_offset + blen > fxr->file1_offset &&
+ fxr->file1_offset + blen > fxr->file2_offset)
+ return -EINVAL;
+
+ /* If we already failed the freshness check, we're done. */
+ ret = generic_xchg_file_range_check_fresh(inode2, fxr);
+ if (ret)
+ return ret;
+
+ /*
+ * Ensure that we don't exchange a partial EOF block into the middle of
+ * another file.
+ */
+ if ((fxr->length & blkmask) == 0)
+ return 0;
+
+ blen = fxr->length;
+ if (fxr->file2_offset + blen < size2)
+ blen &= ~blkmask;
+
+ if (fxr->file1_offset + blen < size1)
+ blen &= ~blkmask;
+
+ return blen == fxr->length ? 0 : -EINVAL;
+}
+
+/*
+ * Check that the two inodes are eligible for range exchanges, the ranges make
+ * sense, and then flush all dirty data. Caller must ensure that the inodes
+ * have been locked against any other modifications.
+ */
+int generic_xchg_file_range_prep(struct file *file1, struct file *file2,
+ struct file_xchg_range *fxr,
+ unsigned int blocksize)
+{
+ struct inode *inode1 = file_inode(file1);
+ struct inode *inode2 = file_inode(file2);
+ bool same_inode = (inode1 == inode2);
+ int ret;
+
+ /* Check that we don't violate system file offset limits. */
+ ret = generic_xchg_file_range_checks(file1, file2, fxr, blocksize);
+ if (ret || fxr->length == 0)
+ return ret;
+
+ /* Wait for the completion of any pending IOs on both files */
+ inode_dio_wait(inode1);
+ if (!same_inode)
+ inode_dio_wait(inode2);
+
+ ret = filemap_write_and_wait_range(inode1->i_mapping, fxr->file1_offset,
+ fxr->file1_offset + fxr->length - 1);
+ if (ret)
+ return ret;
+
+ ret = filemap_write_and_wait_range(inode2->i_mapping, fxr->file2_offset,
+ fxr->file2_offset + fxr->length - 1);
+ if (ret)
+ return ret;
+
+ /*
+ * If the files or inodes involved require synchronous writes, amend
+ * the request to force the filesystem to flush all data and metadata
+ * to disk after the operation completes.
+ */
+ if (((file1->f_flags | file2->f_flags) & (__O_SYNC | O_DSYNC)) ||
+ IS_SYNC(inode1) || IS_SYNC(inode2))
+ fxr->flags |= FILE_XCHG_RANGE_FSYNC;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(generic_xchg_file_range_prep);
+
+/*
+ * Finish a range exchange operation, if it was successful. Caller must ensure
+ * that the inodes are still locked against any other modifications.
+ */
+int generic_xchg_file_range_finish(struct file *file1, struct file *file2)
+{
+ int ret;
+
+ ret = file_remove_privs(file1);
+ if (ret)
+ return ret;
+ if (file_inode(file1) == file_inode(file2))
+ return 0;
+
+ return file_remove_privs(file2);
+}
+EXPORT_SYMBOL_GPL(generic_xchg_file_range_finish);
+
+/*
+ * Check that both files' metadata agree with the snapshot that we took for
+ * the range exchange request.
+
+ * This should be called after the filesystem has locked /all/ inode metadata
+ * against modification.
+ */
+int generic_xchg_file_range_check_fresh(struct inode *inode2,
+ const struct file_xchg_range *fxr)
+{
+ /* Check that file2 hasn't otherwise been modified. */
+ if ((fxr->flags & FILE_XCHG_RANGE_FILE2_FRESH) &&
+ (fxr->file2_ino != inode2->i_ino ||
+ fxr->file2_ctime != inode2->i_ctime.tv_sec ||
+ fxr->file2_ctime_nsec != inode2->i_ctime.tv_nsec ||
+ fxr->file2_mtime != inode2->i_mtime.tv_sec ||
+ fxr->file2_mtime_nsec != inode2->i_mtime.tv_nsec))
+ return -EBUSY;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(generic_xchg_file_range_check_fresh);
+
+static inline int xchg_range_verify_area(struct file *file, loff_t pos,
+ struct file_xchg_range *fxr)
+{
+ int64_t len = fxr->length;
+
+ if (pos < 0)
+ return -EINVAL;
+
+ if (fxr->flags & FILE_XCHG_RANGE_TO_EOF)
+ len = min_t(int64_t, len, i_size_read(file_inode(file)) - pos);
+ return remap_verify_area(file, pos, len, true);
+}
+
+int do_xchg_file_range(struct file *file1, struct file *file2,
+ struct file_xchg_range *fxr)
+{
+ struct inode *inode1 = file_inode(file1);
+ struct inode *inode2 = file_inode(file2);
+ int ret;
+
+ if ((fxr->flags & ~FILE_XCHG_RANGE_ALL_FLAGS) ||
+ memchr_inv(&fxr->pad, 0, sizeof(fxr->pad)))
+ return -EINVAL;
+
+ if ((fxr->flags & FILE_XCHG_RANGE_FULL_FILES) &&
+ (fxr->flags & FILE_XCHG_RANGE_TO_EOF))
+ return -EINVAL;
+
+ /*
+ * The ioctl enforces that src and dest files are on the same mount.
+ * Practically, they only need to be on the same file system.
+ */
+ if (inode1->i_sb != inode2->i_sb)
+ return -EXDEV;
+
+ /* This only works for regular files. */
+ if (S_ISDIR(inode1->i_mode) || S_ISDIR(inode2->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
+ return -EINVAL;
+
+ ret = generic_file_rw_checks(file1, file2);
+ if (ret < 0)
+ return ret;
+
+ ret = generic_file_rw_checks(file2, file1);
+ if (ret < 0)
+ return ret;
+
+ if (!file1->f_op->xchg_file_range)
+ return -EOPNOTSUPP;
+
+ ret = xchg_range_verify_area(file1, fxr->file1_offset, fxr);
+ if (ret)
+ return ret;
+
+ ret = xchg_range_verify_area(file2, fxr->file2_offset, fxr);
+ if (ret)
+ return ret;
+
+ ret = file2->f_op->xchg_file_range(file1, file2, fxr);
+ if (ret)
+ return ret;
+
+ fsnotify_modify(file1);
+ if (file2 != file1)
+ fsnotify_modify(file2);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(do_xchg_file_range);
+
+int vfs_xchg_file_range(struct file *file1, struct file *file2,
+ struct file_xchg_range *fxr)
+{
+ int ret;
+
+ file_start_write(file2);
+ ret = do_xchg_file_range(file1, file2, fxr);
+ file_end_write(file2);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vfs_xchg_file_range);