From 04b38d601239b4d9be641b412cf4b7456a041c67 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Dec 2015 12:59:50 +0100 Subject: vfs: pull btrfs clone API to vfs layer The btrfs clone ioctls are now adopted by other file systems, with NFS and CIFS already having support for them, and XFS being under active development. To avoid growth of various slightly incompatible implementations, add one to the VFS. Note that clones are different from file copies in several ways: - they are atomic vs other writers - they support whole file clones - they support 64-bit legth clones - they do not allow partial success (aka short writes) - clones are expected to be a fast metadata operation Because of that it would be rather cumbersome to try to piggyback them on top of the recent clone_file_range infrastructure. The converse isn't true and the clone_file_range system call could try clone file range as a first attempt to copy, something that further patches will enable. Based on earlier work from Peng Tao. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/ioctl.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'fs/ioctl.c') diff --git a/fs/ioctl.c b/fs/ioctl.c index 5d01d2638ca5..84c6e79829ab 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -215,6 +215,29 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) return error; } +static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) +{ + struct fd src_file = fdget(srcfd); + int ret; + + if (!src_file.file) + return -EBADF; + ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); + fdput(src_file); + return ret; +} + +static long ioctl_file_clone_range(struct file *file, void __user *argp) +{ + struct file_clone_range args; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + return ioctl_file_clone(file, args.src_fd, args.src_offset, + args.src_length, args.dest_offset); +} + #ifdef CONFIG_BLOCK static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) @@ -600,6 +623,12 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, case FIGETBSZ: return put_user(inode->i_sb->s_blocksize, argp); + case FICLONE: + return ioctl_file_clone(filp, arg, 0, 0, 0); + + case FICLONERANGE: + return ioctl_file_clone_range(filp, argp); + default: if (S_ISREG(inode->i_mode)) error = file_ioctl(filp, cmd, arg); -- cgit v1.2.3 From 54dbc15172375641ef03399e8f911d7165eb90fb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 19 Dec 2015 00:55:59 -0800 Subject: vfs: hoist the btrfs deduplication ioctl to the vfs Hoist the btrfs EXTENT_SAME ioctl up to the VFS and make the name more systematic (FIDEDUPERANGE). Signed-off-by: Darrick J. Wong Signed-off-by: Al Viro --- fs/compat_ioctl.c | 1 + fs/ioctl.c | 38 ++++++++++++++++++ fs/read_write.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 4 ++ include/uapi/linux/fs.h | 30 +++++++++++++++ 5 files changed, 173 insertions(+) (limited to 'fs/ioctl.c') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 70d4b104c08d..eab31e74b9cc 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1582,6 +1582,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, case FICLONE: case FICLONERANGE: + case FIDEDUPERANGE: goto do_ioctl; case FIBMAP: diff --git a/fs/ioctl.c b/fs/ioctl.c index 84c6e79829ab..fcdd33b7ec78 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -568,6 +568,41 @@ static int ioctl_fsthaw(struct file *filp) return thaw_super(sb); } +static long ioctl_file_dedupe_range(struct file *file, void __user *arg) +{ + struct file_dedupe_range __user *argp = arg; + struct file_dedupe_range *same = NULL; + int ret; + unsigned long size; + u16 count; + + if (get_user(count, &argp->dest_count)) { + ret = -EFAULT; + goto out; + } + + size = offsetof(struct file_dedupe_range __user, info[count]); + + same = memdup_user(argp, size); + if (IS_ERR(same)) { + ret = PTR_ERR(same); + same = NULL; + goto out; + } + + ret = vfs_dedupe_file_range(file, same); + if (ret) + goto out; + + ret = copy_to_user(argp, same, size); + if (ret) + ret = -EFAULT; + +out: + kfree(same); + return ret; +} + /* * When you add any new common ioctls to the switches above and below * please update compat_sys_ioctl() too. @@ -629,6 +664,9 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, case FICLONERANGE: return ioctl_file_clone_range(filp, argp); + case FIDEDUPERANGE: + return ioctl_file_dedupe_range(filp, argp); + default: if (S_ISREG(inode->i_mode)) error = file_ioctl(filp, cmd, arg); diff --git a/fs/read_write.c b/fs/read_write.c index 60ee26941231..2116e74a83d3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1523,3 +1523,103 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, return ret; } EXPORT_SYMBOL(vfs_clone_file_range); + +int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) +{ + struct file_dedupe_range_info *info; + struct inode *src = file_inode(file); + u64 off; + u64 len; + int i; + int ret; + bool is_admin = capable(CAP_SYS_ADMIN); + u16 count = same->dest_count; + struct file *dst_file; + loff_t dst_off; + ssize_t deduped; + + if (!(file->f_mode & FMODE_READ)) + return -EINVAL; + + if (same->reserved1 || same->reserved2) + return -EINVAL; + + off = same->src_offset; + len = same->src_length; + + ret = -EISDIR; + if (S_ISDIR(src->i_mode)) + goto out; + + ret = -EINVAL; + if (!S_ISREG(src->i_mode)) + goto out; + + ret = clone_verify_area(file, off, len, false); + if (ret < 0) + goto out; + ret = 0; + + /* pre-format output fields to sane values */ + for (i = 0; i < count; i++) { + same->info[i].bytes_deduped = 0ULL; + same->info[i].status = FILE_DEDUPE_RANGE_SAME; + } + + for (i = 0, info = same->info; i < count; i++, info++) { + struct inode *dst; + struct fd dst_fd = fdget(info->dest_fd); + + dst_file = dst_fd.file; + if (!dst_file) { + info->status = -EBADF; + goto next_loop; + } + dst = file_inode(dst_file); + + ret = mnt_want_write_file(dst_file); + if (ret) { + info->status = ret; + goto next_loop; + } + + dst_off = info->dest_offset; + ret = clone_verify_area(dst_file, dst_off, len, true); + if (ret < 0) { + info->status = ret; + goto next_file; + } + ret = 0; + + if (info->reserved) { + info->status = -EINVAL; + } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { + info->status = -EINVAL; + } else if (file->f_path.mnt != dst_file->f_path.mnt) { + info->status = -EXDEV; + } else if (S_ISDIR(dst->i_mode)) { + info->status = -EISDIR; + } else if (dst_file->f_op->dedupe_file_range == NULL) { + info->status = -EINVAL; + } else { + deduped = dst_file->f_op->dedupe_file_range(file, off, + len, dst_file, + info->dest_offset); + if (deduped == -EBADE) + info->status = FILE_DEDUPE_RANGE_DIFFERS; + else if (deduped < 0) + info->status = deduped; + else + info->bytes_deduped += deduped; + } + +next_file: + mnt_drop_write_file(dst_file); +next_loop: + fdput(dst_fd); + } + +out: + return ret; +} +EXPORT_SYMBOL(vfs_dedupe_file_range); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5d987aefcf1e..d71814b81a3c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1633,6 +1633,8 @@ struct file_operations { loff_t, size_t, unsigned int); int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64); + ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *, + u64); }; struct inode_operations { @@ -1688,6 +1690,8 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); +extern int vfs_dedupe_file_range(struct file *file, + struct file_dedupe_range *same); struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index cd5db7fb3cb7..b38e647664a0 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -52,6 +52,35 @@ struct fstrim_range { __u64 minlen; }; +/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ +#define FILE_DEDUPE_RANGE_SAME 0 +#define FILE_DEDUPE_RANGE_DIFFERS 1 + +/* from struct btrfs_ioctl_file_extent_same_info */ +struct file_dedupe_range_info { + __s64 dest_fd; /* in - destination file */ + __u64 dest_offset; /* in - start of extent in destination */ + __u64 bytes_deduped; /* out - total # of bytes we were able + * to dedupe from this file. */ + /* status of this dedupe operation: + * < 0 for error + * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds + * == FILE_DEDUPE_RANGE_DIFFERS if data differs + */ + __s32 status; /* out - see above description */ + __u32 reserved; /* must be zero */ +}; + +/* from struct btrfs_ioctl_file_extent_same_args */ +struct file_dedupe_range { + __u64 src_offset; /* in - start of extent in source */ + __u64 src_length; /* in - length of extent */ + __u16 dest_count; /* in - total elements in info array */ + __u16 reserved1; /* must be zero */ + __u32 reserved2; /* must be zero */ + struct file_dedupe_range_info info[0]; +}; + /* And dynamically-tunable limits and defaults: */ struct files_stat_struct { unsigned long nr_files; /* read only */ @@ -168,6 +197,7 @@ struct inodes_stat_t { #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ #define FICLONE _IOW(0x94, 9, int) #define FICLONERANGE _IOW(0x94, 13, struct file_clone_range) +#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range) #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) -- cgit v1.2.3