From 3db11b2eecc02dc0eee943e71822c6d929281aa7 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 10 Nov 2015 16:53:32 -0500 Subject: btrfs: add .copy_file_range file operation This rearranges the existing COPY_RANGE ioctl implementation so that the .copy_file_range file operation can call the core loop that copies file data extent items. The extent copying loop is lifted up into its own function. It retains the core btrfs error checks that should be shared. Signed-off-by: Zach Brown [Anna Schumaker: Make flags an unsigned int, Check for COPY_FR_REFLINK] Signed-off-by: Anna Schumaker Reviewed-by: Josef Bacik Reviewed-by: David Sterba Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 91 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 39 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index da94138eb85e..0f92735299d3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3779,17 +3779,16 @@ out: return ret; } -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, - u64 off, u64 olen, u64 destoff) +static noinline int btrfs_clone_files(struct file *file, struct file *file_src, + u64 off, u64 olen, u64 destoff) { struct inode *inode = file_inode(file); + struct inode *src = file_inode(file_src); struct btrfs_root *root = BTRFS_I(inode)->root; - struct fd src_file; - struct inode *src; int ret; u64 len = olen; u64 bs = root->fs_info->sb->s_blocksize; - int same_inode = 0; + int same_inode = src == inode; /* * TODO: @@ -3802,49 +3801,20 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, * be either compressed or non-compressed. */ - /* the destination must be opened for writing */ - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) - return -EINVAL; - if (btrfs_root_readonly(root)) return -EROFS; - ret = mnt_want_write_file(file); - if (ret) - return ret; - - src_file = fdget(srcfd); - if (!src_file.file) { - ret = -EBADF; - goto out_drop_write; - } - - ret = -EXDEV; - if (src_file.file->f_path.mnt != file->f_path.mnt) - goto out_fput; - - src = file_inode(src_file.file); - - ret = -EINVAL; - if (src == inode) - same_inode = 1; - - /* the src must be open for reading */ - if (!(src_file.file->f_mode & FMODE_READ)) - goto out_fput; + if (file_src->f_path.mnt != file->f_path.mnt || + src->i_sb != inode->i_sb) + return -EXDEV; /* don't make the dst file partly checksummed */ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) - goto out_fput; + return -EINVAL; - ret = -EISDIR; if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) - goto out_fput; - - ret = -EXDEV; - if (src->i_sb != inode->i_sb) - goto out_fput; + return -EISDIR; if (!same_inode) { btrfs_double_inode_lock(src, inode); @@ -3921,6 +3891,49 @@ out_unlock: btrfs_double_inode_unlock(src, inode); else mutex_unlock(&src->i_mutex); + return ret; +} + +ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out); + if (ret == 0) + ret = len; + return ret; +} + +static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) +{ + struct fd src_file; + int ret; + + /* the destination must be opened for writing */ + if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) + return -EINVAL; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + src_file = fdget(srcfd); + if (!src_file.file) { + ret = -EBADF; + goto out_drop_write; + } + + /* the src must be open for reading */ + if (!(src_file.file->f_mode & FMODE_READ)) { + ret = -EINVAL; + goto out_fput; + } + + ret = btrfs_clone_files(file, src_file.file, off, olen, destoff); + out_fput: fdput(src_file); out_drop_write: -- cgit v1.2.3 From 04b38d601239b4d9be641b412cf4b7456a041c67 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Dec 2015 12:59:50 +0100 Subject: vfs: pull btrfs clone API to vfs layer The btrfs clone ioctls are now adopted by other file systems, with NFS and CIFS already having support for them, and XFS being under active development. To avoid growth of various slightly incompatible implementations, add one to the VFS. Note that clones are different from file copies in several ways: - they are atomic vs other writers - they support whole file clones - they support 64-bit legth clones - they do not allow partial success (aka short writes) - clones are expected to be a fast metadata operation Because of that it would be rather cumbersome to try to piggyback them on top of the recent clone_file_range infrastructure. The converse isn't true and the clone_file_range system call could try clone file range as a first attempt to copy, something that further patches will enable. Based on earlier work from Peng Tao. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/btrfs/ctree.h | 3 +- fs/btrfs/file.c | 1 + fs/btrfs/ioctl.c | 49 ++----------------- fs/cifs/cifsfs.c | 63 ++++++++++++++++++++++++ fs/cifs/cifsfs.h | 1 - fs/cifs/ioctl.c | 126 +++++++++++++++++++++++------------------------- fs/ioctl.c | 29 +++++++++++ fs/nfs/nfs4file.c | 87 ++++----------------------------- fs/read_write.c | 72 +++++++++++++++++++++++++++ include/linux/fs.h | 7 ++- include/uapi/linux/fs.h | 9 ++++ 11 files changed, 254 insertions(+), 193 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ede7277c167f..dd4733fa882c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4025,7 +4025,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list, void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, struct btrfs_ioctl_balance_args *bargs); - /* file.c */ int btrfs_auto_defrag_init(void); void btrfs_auto_defrag_exit(void); @@ -4058,6 +4057,8 @@ int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags); +int btrfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e67fe6ab8c9e..232e300a6c93 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2925,6 +2925,7 @@ const struct file_operations btrfs_file_operations = { .compat_ioctl = btrfs_ioctl, #endif .copy_file_range = btrfs_copy_file_range, + .clone_file_range = btrfs_clone_file_range, }; void btrfs_auto_defrag_exit(void) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0f92735299d3..85b1caeeec85 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3906,49 +3906,10 @@ ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in, return ret; } -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, - u64 off, u64 olen, u64 destoff) +int btrfs_clone_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, u64 len) { - struct fd src_file; - int ret; - - /* the destination must be opened for writing */ - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) - return -EINVAL; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - src_file = fdget(srcfd); - if (!src_file.file) { - ret = -EBADF; - goto out_drop_write; - } - - /* the src must be open for reading */ - if (!(src_file.file->f_mode & FMODE_READ)) { - ret = -EINVAL; - goto out_fput; - } - - ret = btrfs_clone_files(file, src_file.file, off, olen, destoff); - -out_fput: - fdput(src_file); -out_drop_write: - mnt_drop_write_file(file); - return ret; -} - -static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) -{ - struct btrfs_ioctl_clone_range_args args; - - if (copy_from_user(&args, argp, sizeof(args))) - return -EFAULT; - return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, - args.src_length, args.dest_offset); + return btrfs_clone_files(dst_file, src_file, off, len, destoff); } /* @@ -5498,10 +5459,6 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_dev_info(root, argp); case BTRFS_IOC_BALANCE: return btrfs_ioctl_balance(file, NULL); - case BTRFS_IOC_CLONE: - return btrfs_ioctl_clone(file, arg, 0, 0, 0); - case BTRFS_IOC_CLONE_RANGE: - return btrfs_ioctl_clone_range(file, argp); case BTRFS_IOC_TRANS_START: return btrfs_ioctl_trans_start(file); case BTRFS_IOC_TRANS_END: diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index cbc0f4bca0c0..e9b978f2e114 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -914,6 +914,61 @@ const struct inode_operations cifs_symlink_inode_ops = { #endif }; +static int cifs_clone_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, u64 len) +{ + struct inode *src_inode = file_inode(src_file); + struct inode *target_inode = file_inode(dst_file); + struct cifsFileInfo *smb_file_src = src_file->private_data; + struct cifsFileInfo *smb_file_target = dst_file->private_data; + struct cifs_tcon *src_tcon = tlink_tcon(smb_file_src->tlink); + struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink); + unsigned int xid; + int rc; + + cifs_dbg(FYI, "clone range\n"); + + xid = get_xid(); + + if (!src_file->private_data || !dst_file->private_data) { + rc = -EBADF; + cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); + goto out; + } + + /* + * Note: cifs case is easier than btrfs since server responsible for + * checks for proper open modes and file type and if it wants + * server could even support copy of range where source = target + */ + lock_two_nondirectories(target_inode, src_inode); + + if (len == 0) + len = src_inode->i_size - off; + + cifs_dbg(FYI, "about to flush pages\n"); + /* should we flush first and last page first */ + truncate_inode_pages_range(&target_inode->i_data, destoff, + PAGE_CACHE_ALIGN(destoff + len)-1); + + if (target_tcon->ses->server->ops->duplicate_extents) + rc = target_tcon->ses->server->ops->duplicate_extents(xid, + smb_file_src, smb_file_target, off, len, destoff); + else + rc = -EOPNOTSUPP; + + /* force revalidate of size and timestamps of target file now + that target is updated on the server */ + CIFS_I(target_inode)->time = 0; +out_unlock: + /* although unlocking in the reverse order from locking is not + strictly necessary here it is a little cleaner to be consistent */ + unlock_two_nondirectories(src_inode, target_inode); +out: + free_xid(xid); + return rc; +} + const struct file_operations cifs_file_ops = { .read_iter = cifs_loose_read_iter, .write_iter = cifs_file_write_iter, @@ -926,6 +981,7 @@ const struct file_operations cifs_file_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -942,6 +998,8 @@ const struct file_operations cifs_file_strict_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .clone_file_range = cifs_clone_file_range, + .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -958,6 +1016,7 @@ const struct file_operations cifs_file_direct_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .unlocked_ioctl = cifs_ioctl, + .clone_file_range = cifs_clone_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -974,6 +1033,7 @@ const struct file_operations cifs_file_nobrl_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -989,6 +1049,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1004,6 +1065,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .unlocked_ioctl = cifs_ioctl, + .clone_file_range = cifs_clone_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1014,6 +1076,7 @@ const struct file_operations cifs_dir_ops = { .release = cifs_closedir, .read = generic_read_dir, .unlocked_ioctl = cifs_ioctl, + .clone_file_range = cifs_clone_file_range, .llseek = generic_file_llseek, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index c3cc1609025f..c399513c3cbd 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -131,7 +131,6 @@ extern int cifs_setxattr(struct dentry *, const char *, const void *, extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); - #ifdef CONFIG_CIFS_NFSD_EXPORT extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 35cf990f87d3..7a3b84e300f8 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -34,73 +34,36 @@ #include "cifs_ioctl.h" #include -static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, - unsigned long srcfd, u64 off, u64 len, u64 destoff, - bool dup_extents) +static int cifs_file_clone_range(unsigned int xid, struct file *src_file, + struct file *dst_file) { - int rc; - struct cifsFileInfo *smb_file_target = dst_file->private_data; + struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); - struct cifs_tcon *target_tcon; - struct fd src_file; struct cifsFileInfo *smb_file_src; - struct inode *src_inode; + struct cifsFileInfo *smb_file_target; struct cifs_tcon *src_tcon; + struct cifs_tcon *target_tcon; + int rc; cifs_dbg(FYI, "ioctl clone range\n"); - /* the destination must be opened for writing */ - if (!(dst_file->f_mode & FMODE_WRITE)) { - cifs_dbg(FYI, "file target not open for write\n"); - return -EINVAL; - } - /* check if target volume is readonly and take reference */ - rc = mnt_want_write_file(dst_file); - if (rc) { - cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc); - return rc; - } - - src_file = fdget(srcfd); - if (!src_file.file) { - rc = -EBADF; - goto out_drop_write; - } - - if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { - rc = -EBADF; - cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); - goto out_fput; - } - - if ((!src_file.file->private_data) || (!dst_file->private_data)) { + if (!src_file->private_data || !dst_file->private_data) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); - goto out_fput; + goto out; } rc = -EXDEV; smb_file_target = dst_file->private_data; - smb_file_src = src_file.file->private_data; + smb_file_src = src_file->private_data; src_tcon = tlink_tcon(smb_file_src->tlink); target_tcon = tlink_tcon(smb_file_target->tlink); - /* check source and target on same server (or volume if dup_extents) */ - if (dup_extents && (src_tcon != target_tcon)) { - cifs_dbg(VFS, "source and target of copy not on same share\n"); - goto out_fput; - } - - if (!dup_extents && (src_tcon->ses != target_tcon->ses)) { + if (src_tcon->ses != target_tcon->ses) { cifs_dbg(VFS, "source and target of copy not on same server\n"); - goto out_fput; + goto out; } - src_inode = file_inode(src_file.file); - rc = -EINVAL; - if (S_ISDIR(src_inode->i_mode)) - goto out_fput; - /* * Note: cifs case is easier than btrfs since server responsible for * checks for proper open modes and file type and if it wants @@ -108,34 +71,66 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, */ lock_two_nondirectories(target_inode, src_inode); - /* determine range to clone */ - rc = -EINVAL; - if (off + len > src_inode->i_size || off + len < off) - goto out_unlock; - if (len == 0) - len = src_inode->i_size - off; - cifs_dbg(FYI, "about to flush pages\n"); /* should we flush first and last page first */ - truncate_inode_pages_range(&target_inode->i_data, destoff, - PAGE_CACHE_ALIGN(destoff + len)-1); + truncate_inode_pages(&target_inode->i_data, 0); - if (dup_extents && target_tcon->ses->server->ops->duplicate_extents) - rc = target_tcon->ses->server->ops->duplicate_extents(xid, - smb_file_src, smb_file_target, off, len, destoff); - else if (!dup_extents && target_tcon->ses->server->ops->clone_range) + if (target_tcon->ses->server->ops->clone_range) rc = target_tcon->ses->server->ops->clone_range(xid, - smb_file_src, smb_file_target, off, len, destoff); + smb_file_src, smb_file_target, 0, src_inode->i_size, 0); else rc = -EOPNOTSUPP; /* force revalidate of size and timestamps of target file now that target is updated on the server */ CIFS_I(target_inode)->time = 0; -out_unlock: /* although unlocking in the reverse order from locking is not strictly necessary here it is a little cleaner to be consistent */ unlock_two_nondirectories(src_inode, target_inode); +out: + return rc; +} + +static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, + unsigned long srcfd) +{ + int rc; + struct fd src_file; + struct inode *src_inode; + + cifs_dbg(FYI, "ioctl clone range\n"); + /* the destination must be opened for writing */ + if (!(dst_file->f_mode & FMODE_WRITE)) { + cifs_dbg(FYI, "file target not open for write\n"); + return -EINVAL; + } + + /* check if target volume is readonly and take reference */ + rc = mnt_want_write_file(dst_file); + if (rc) { + cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc); + return rc; + } + + src_file = fdget(srcfd); + if (!src_file.file) { + rc = -EBADF; + goto out_drop_write; + } + + if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { + rc = -EBADF; + cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); + goto out_fput; + } + + src_inode = file_inode(src_file.file); + rc = -EINVAL; + if (S_ISDIR(src_inode->i_mode)) + goto out_fput; + + rc = cifs_file_clone_range(xid, src_file.file, dst_file); + out_fput: fdput(src_file); out_drop_write: @@ -256,10 +251,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) } break; case CIFS_IOC_COPYCHUNK_FILE: - rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false); - break; - case BTRFS_IOC_CLONE: - rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true); + rc = cifs_ioctl_clone(xid, filep, arg); break; case CIFS_IOC_SET_INTEGRITY: if (pSMBFile == NULL) diff --git a/fs/ioctl.c b/fs/ioctl.c index 5d01d2638ca5..84c6e79829ab 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -215,6 +215,29 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) return error; } +static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) +{ + struct fd src_file = fdget(srcfd); + int ret; + + if (!src_file.file) + return -EBADF; + ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); + fdput(src_file); + return ret; +} + +static long ioctl_file_clone_range(struct file *file, void __user *argp) +{ + struct file_clone_range args; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + return ioctl_file_clone(file, args.src_fd, args.src_offset, + args.src_length, args.dest_offset); +} + #ifdef CONFIG_BLOCK static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) @@ -600,6 +623,12 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, case FIGETBSZ: return put_user(inode->i_sb->s_blocksize, argp); + case FICLONE: + return ioctl_file_clone(filp, arg, 0, 0, 0); + + case FICLONERANGE: + return ioctl_file_clone_range(filp, argp); + default: if (S_ISREG(inode->i_mode)) error = file_ioctl(filp, cmd, arg); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index db9b5fea5b3e..26f9a23e2b25 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -195,65 +195,27 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_allocate(filep, offset, len); } -static noinline long -nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, - u64 src_off, u64 dst_off, u64 count) +static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, u64 count) { struct inode *dst_inode = file_inode(dst_file); struct nfs_server *server = NFS_SERVER(dst_inode); - struct fd src_file; - struct inode *src_inode; + struct inode *src_inode = file_inode(src_file); unsigned int bs = server->clone_blksize; bool same_inode = false; int ret; - /* dst file must be opened for writing */ - if (!(dst_file->f_mode & FMODE_WRITE)) - return -EINVAL; - - ret = mnt_want_write_file(dst_file); - if (ret) - return ret; - - src_file = fdget(srcfd); - if (!src_file.file) { - ret = -EBADF; - goto out_drop_write; - } - - src_inode = file_inode(src_file.file); - - if (src_inode == dst_inode) - same_inode = true; - - /* src file must be opened for reading */ - if (!(src_file.file->f_mode & FMODE_READ)) - goto out_fput; - - /* src and dst must be regular files */ - ret = -EISDIR; - if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode)) - goto out_fput; - - ret = -EXDEV; - if (src_file.file->f_path.mnt != dst_file->f_path.mnt || - src_inode->i_sb != dst_inode->i_sb) - goto out_fput; - /* check alignment w.r.t. clone_blksize */ ret = -EINVAL; if (bs) { if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs)) - goto out_fput; + goto out; if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count)) - goto out_fput; + goto out; } - /* verify if ranges are overlapped within the same file */ - if (same_inode) { - if (dst_off + count > src_off && dst_off < src_off + count) - goto out_fput; - } + if (src_inode == dst_inode) + same_inode = true; /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ if (same_inode) { @@ -275,7 +237,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, if (ret) goto out_unlock; - ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count); + ret = nfs42_proc_clone(src_file, dst_file, src_off, dst_off, count); /* truncate inode page cache of the dst range so that future reads can fetch * new data from server */ @@ -292,37 +254,9 @@ out_unlock: mutex_unlock(&dst_inode->i_mutex); mutex_unlock(&src_inode->i_mutex); } -out_fput: - fdput(src_file); -out_drop_write: - mnt_drop_write_file(dst_file); +out: return ret; } - -static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) -{ - struct btrfs_ioctl_clone_range_args args; - - if (copy_from_user(&args, argp, sizeof(args))) - return -EFAULT; - - return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset, - args.dest_offset, args.src_length); -} - -long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - - switch (cmd) { - case BTRFS_IOC_CLONE: - return nfs42_ioctl_clone(file, arg, 0, 0, 0); - case BTRFS_IOC_CLONE_RANGE: - return nfs42_ioctl_clone_range(file, argp); - } - - return -ENOTTY; -} #endif /* CONFIG_NFS_V4_2 */ const struct file_operations nfs4_file_operations = { @@ -342,8 +276,7 @@ const struct file_operations nfs4_file_operations = { #ifdef CONFIG_NFS_V4_2 .llseek = nfs4_file_llseek, .fallocate = nfs42_fallocate, - .unlocked_ioctl = nfs4_ioctl, - .compat_ioctl = nfs4_ioctl, + .clone_file_range = nfs42_clone_file_range, #else .llseek = nfs_file_llseek, #endif diff --git a/fs/read_write.c b/fs/read_write.c index 6cfad4761fd8..c75d02cb13ec 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1451,3 +1451,75 @@ out1: out2: return ret; } + +static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) +{ + struct inode *inode = file_inode(file); + + if (unlikely(pos < 0)) + return -EINVAL; + + if (unlikely((loff_t) (pos + len) < 0)) + return -EINVAL; + + if (unlikely(inode->i_flctx && mandatory_lock(inode))) { + loff_t end = len ? pos + len - 1 : OFFSET_MAX; + int retval; + + retval = locks_mandatory_area(inode, file, pos, end, + write ? F_WRLCK : F_RDLCK); + if (retval < 0) + return retval; + } + + return security_file_permission(file, write ? MAY_WRITE : MAY_READ); +} + +int vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + int ret; + + if (inode_in->i_sb != inode_out->i_sb || + file_in->f_path.mnt != file_out->f_path.mnt) + return -EXDEV; + + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EOPNOTSUPP; + + if (!(file_in->f_mode & FMODE_READ) || + !(file_out->f_mode & FMODE_WRITE) || + (file_out->f_flags & O_APPEND) || + !file_in->f_op->clone_file_range) + return -EBADF; + + ret = clone_verify_area(file_in, pos_in, len, false); + if (ret) + return ret; + + ret = clone_verify_area(file_out, pos_out, len, true); + if (ret) + return ret; + + if (pos_in + len > i_size_read(inode_in)) + return -EINVAL; + + ret = mnt_want_write_file(file_out); + if (ret) + return ret; + + ret = file_in->f_op->clone_file_range(file_in, pos_in, + file_out, pos_out, len); + if (!ret) { + fsnotify_access(file_in); + fsnotify_modify(file_out); + } + + mnt_drop_write_file(file_out); + return ret; +} +EXPORT_SYMBOL(vfs_clone_file_range); diff --git a/include/linux/fs.h b/include/linux/fs.h index 4377b2df991d..5d987aefcf1e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1629,7 +1629,10 @@ struct file_operations { #ifndef CONFIG_MMU unsigned (*mmap_capabilities)(struct file *); #endif - ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); + ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, + loff_t, size_t, unsigned int); + int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, + u64); }; struct inode_operations { @@ -1683,6 +1686,8 @@ extern ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); +extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len); struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index f15d980249b5..cd5db7fb3cb7 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -39,6 +39,13 @@ #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +struct file_clone_range { + __s64 src_fd; + __u64 src_offset; + __u64 src_length; + __u64 dest_offset; +}; + struct fstrim_range { __u64 start; __u64 len; @@ -159,6 +166,8 @@ struct inodes_stat_t { #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ #define FITHAW _IOWR('X', 120, int) /* Thaw */ #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ +#define FICLONE _IOW(0x94, 9, int) +#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range) #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) -- cgit v1.2.3 From 2b3909f8a7fe94e0234850aa9d120cca15b6e1f7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 19 Dec 2015 00:56:05 -0800 Subject: btrfs: use new dedupe data function pointer Now that the VFS encapsulates the dedupe ioctl, wire up btrfs to it. Signed-off-by: Darrick J. Wong Signed-off-by: Al Viro --- fs/btrfs/ctree.h | 2 + fs/btrfs/file.c | 1 + fs/btrfs/ioctl.c | 110 +++++++------------------------------------------------ 3 files changed, 16 insertions(+), 97 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dd4733fa882c..b7e4e344e8e0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4024,6 +4024,8 @@ void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, struct btrfs_ioctl_balance_args *bargs); +ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, + struct file *dst_file, u64 dst_loff); /* file.c */ int btrfs_auto_defrag_init(void); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 232e300a6c93..d012e0a96ec3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2926,6 +2926,7 @@ const struct file_operations btrfs_file_operations = { #endif .copy_file_range = btrfs_copy_file_range, .clone_file_range = btrfs_clone_file_range, + .dedupe_file_range = btrfs_dedupe_file_range, }; void btrfs_auto_defrag_exit(void) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 85b1caeeec85..e21997385d14 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2962,7 +2962,7 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, flush_dcache_page(dst_page); if (memcmp(addr, dst_addr, cmp_len)) - ret = BTRFS_SAME_DATA_DIFFERS; + ret = -EBADE; kunmap_atomic(addr); kunmap_atomic(dst_addr); @@ -3098,53 +3098,16 @@ out_unlock: #define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) -static long btrfs_ioctl_file_extent_same(struct file *file, - struct btrfs_ioctl_same_args __user *argp) +ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, + struct file *dst_file, u64 dst_loff) { - struct btrfs_ioctl_same_args *same = NULL; - struct btrfs_ioctl_same_extent_info *info; - struct inode *src = file_inode(file); - u64 off; - u64 len; - int i; - int ret; - unsigned long size; + struct inode *src = file_inode(src_file); + struct inode *dst = file_inode(dst_file); u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; - bool is_admin = capable(CAP_SYS_ADMIN); - u16 count; - - if (!(file->f_mode & FMODE_READ)) - return -EINVAL; + ssize_t res; - ret = mnt_want_write_file(file); - if (ret) - return ret; - - if (get_user(count, &argp->dest_count)) { - ret = -EFAULT; - goto out; - } - - size = offsetof(struct btrfs_ioctl_same_args __user, info[count]); - - same = memdup_user(argp, size); - - if (IS_ERR(same)) { - ret = PTR_ERR(same); - same = NULL; - goto out; - } - - off = same->logical_offset; - len = same->length; - - /* - * Limit the total length we will dedupe for each operation. - * This is intended to bound the total time spent in this - * ioctl to something sane. - */ - if (len > BTRFS_MAX_DEDUPE_LEN) - len = BTRFS_MAX_DEDUPE_LEN; + if (olen > BTRFS_MAX_DEDUPE_LEN) + olen = BTRFS_MAX_DEDUPE_LEN; if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { /* @@ -3152,58 +3115,13 @@ static long btrfs_ioctl_file_extent_same(struct file *file, * result, btrfs_cmp_data() won't correctly handle * this situation without an update. */ - ret = -EINVAL; - goto out; - } - - ret = -EISDIR; - if (S_ISDIR(src->i_mode)) - goto out; - - ret = -EACCES; - if (!S_ISREG(src->i_mode)) - goto out; - - /* pre-format output fields to sane values */ - for (i = 0; i < count; i++) { - same->info[i].bytes_deduped = 0ULL; - same->info[i].status = 0; - } - - for (i = 0, info = same->info; i < count; i++, info++) { - struct inode *dst; - struct fd dst_file = fdget(info->fd); - if (!dst_file.file) { - info->status = -EBADF; - continue; - } - dst = file_inode(dst_file.file); - - if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) { - info->status = -EINVAL; - } else if (file->f_path.mnt != dst_file.file->f_path.mnt) { - info->status = -EXDEV; - } else if (S_ISDIR(dst->i_mode)) { - info->status = -EISDIR; - } else if (!S_ISREG(dst->i_mode)) { - info->status = -EACCES; - } else { - info->status = btrfs_extent_same(src, off, len, dst, - info->logical_offset); - if (info->status == 0) - info->bytes_deduped += len; - } - fdput(dst_file); + return -EINVAL; } - ret = copy_to_user(argp, same, size); - if (ret) - ret = -EFAULT; - -out: - mnt_drop_write_file(file); - kfree(same); - return ret; + res = btrfs_extent_same(src, loff, olen, dst, dst_loff); + if (res) + return res; + return olen; } static int clone_finish_inode_update(struct btrfs_trans_handle *trans, @@ -5536,8 +5454,6 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_get_fslabel(file, argp); case BTRFS_IOC_SET_FSLABEL: return btrfs_ioctl_set_fslabel(file, argp); - case BTRFS_IOC_FILE_EXTENT_SAME: - return btrfs_ioctl_file_extent_same(file, argp); case BTRFS_IOC_GET_SUPPORTED_FEATURES: return btrfs_ioctl_get_supported_features(file, argp); case BTRFS_IOC_GET_FEATURES: -- cgit v1.2.3