From f16acc9d9b3761ae5e45219c9302f99e20919829 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 5 Jun 2019 08:04:47 -0700 Subject: vfs: introduce generic_copy_file_range() Right now if vfs_copy_file_range() does not use any offload mechanism, it falls back to calling do_splice_direct(). This fails to do basic sanity checks on the files being copied. Before we start adding this necessarily functionality to the fallback path, separate it out into generic_copy_file_range(). generic_copy_file_range() has the same prototype as ->copy_file_range() so that filesystems can use it in their custom ->copy_file_range() method if they so choose. Signed-off-by: Dave Chinner Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/read_write.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index c543d965e288..676b02fae589 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1565,6 +1565,36 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif +/** + * generic_copy_file_range - copy data between two files + * @file_in: file structure to read from + * @pos_in: file offset to read from + * @file_out: file structure to write data to + * @pos_out: file offset to write data to + * @len: amount of data to copy + * @flags: copy flags + * + * This is a generic filesystem helper to copy data from one file to another. + * It has no constraints on the source or destination file owners - the files + * can belong to different superblocks and different filesystem types. Short + * copies are allowed. + * + * This should be called from the @file_out filesystem, as per the + * ->copy_file_range() method. + * + * Returns the number of bytes copied or a negative error indicating the + * failure. + */ + +ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, + len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); +} +EXPORT_SYMBOL(generic_copy_file_range); + /* * copy_file_range() differs from regular file read and write in that it * specifically allows return partial success. When it does so is up to @@ -1632,9 +1662,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, goto done; } - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); - + ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); done: if (ret > 0) { fsnotify_access(file_in); -- cgit v1.2.3 From 64bf5ff58dff757253cf2142542672de4b21cd1a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 5 Jun 2019 08:04:47 -0700 Subject: vfs: no fallback for ->copy_file_range Now that we have generic_copy_file_range(), remove it as a fallback case when offloads fail. This puts the responsibility for executing fallbacks on the filesystems that implement ->copy_file_range and allows us to add operational validity checks to generic_copy_file_range(). Rework vfs_copy_file_range() to call a new do_copy_file_range() helper to execute the copying callout, and move calls to generic_file_copy_range() into filesystem methods where they currently return failures. [Amir] overlayfs is not responsible of executing the fallback. It is the responsibility of the underlying filesystem. Signed-off-by: Dave Chinner Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/ceph/file.c | 21 ++++++++++++++++++--- fs/cifs/cifsfs.c | 4 ++++ fs/fuse/file.c | 21 ++++++++++++++++++--- fs/nfs/nfs4file.c | 20 +++++++++++++++++--- fs/read_write.c | 25 ++++++++++++++++--------- 5 files changed, 73 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 183c37c0a8fc..31d450536175 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1889,9 +1889,9 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, return 0; } -static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, - struct file *dst_file, loff_t dst_off, - size_t len, unsigned int flags) +static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) { struct inode *src_inode = file_inode(src_file); struct inode *dst_inode = file_inode(dst_file); @@ -2100,6 +2100,21 @@ out: return ret; } +static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, + len, flags); + + if (ret == -EOPNOTSUPP) + ret = generic_copy_file_range(src_file, src_off, dst_file, + dst_off, len, flags); + return ret; +} + const struct file_operations ceph_file_fops = { .open = ceph_open, .release = ceph_release, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f5fcd6360056..c65823270313 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1148,6 +1148,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, len, flags); free_xid(xid); + + if (rc == -EOPNOTSUPP) + rc = generic_copy_file_range(src_file, off, dst_file, + destoff, len, flags); return rc; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b8f9c83835d5..4c20bf61d9c3 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3112,9 +3112,9 @@ out: return err; } -static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) +static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) { struct fuse_file *ff_in = file_in->private_data; struct fuse_file *ff_out = file_out->private_data; @@ -3194,6 +3194,21 @@ out: return err; } +static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, + len, flags); + + if (ret == -EOPNOTSUPP) + ret = generic_copy_file_range(src_file, src_off, dst_file, + dst_off, len, flags); + return ret; +} + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read_iter = fuse_file_read_iter, diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index cf42a8b939e3..4842f3ab3161 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -129,9 +129,9 @@ nfs4_file_flush(struct file *file, fl_owner_t id) } #ifdef CONFIG_NFS_V4_2 -static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t count, unsigned int flags) +static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t count, unsigned int flags) { if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) return -EOPNOTSUPP; @@ -140,6 +140,20 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); } +static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t count, unsigned int flags) +{ + ssize_t ret; + + ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count, + flags); + if (ret == -EOPNOTSUPP) + ret = generic_copy_file_range(file_in, pos_in, file_out, + pos_out, count, flags); + return ret; +} + static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) { loff_t ret; diff --git a/fs/read_write.c b/fs/read_write.c index 676b02fae589..b63dcb4e4fe9 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1595,6 +1595,19 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_copy_file_range); +static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + if (file_out->f_op->copy_file_range) + return file_out->f_op->copy_file_range(file_in, pos_in, + file_out, pos_out, + len, flags); + + return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); +} + /* * copy_file_range() differs from regular file read and write in that it * specifically allows return partial success. When it does so is up to @@ -1655,15 +1668,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, } } - if (file_out->f_op->copy_file_range) { - ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, - pos_out, len, flags); - if (ret != -EOPNOTSUPP) - goto done; - } - - ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); + ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); + WARN_ON_ONCE(ret == -EOPNOTSUPP); done: if (ret > 0) { fsnotify_access(file_in); -- cgit v1.2.3 From a31713517dac0862a3f0ec9006df9160ce022b0c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 5 Jun 2019 08:04:48 -0700 Subject: vfs: introduce generic_file_rw_checks() Factor out helper with some checks on in/out file that are common to clone_file_range and copy_file_range. Suggested-by: Darrick J. Wong Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/read_write.c | 38 +++++++++++--------------------------- include/linux/fs.h | 1 + mm/filemap.c | 24 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index b63dcb4e4fe9..f1900bdb3127 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1617,17 +1617,18 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); ssize_t ret; if (flags != 0) return -EINVAL; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; + /* this could be relaxed once a method supports cross-fs copies */ + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; + + ret = generic_file_rw_checks(file_in, file_out); + if (unlikely(ret)) + return ret; ret = rw_verify_area(READ, file_in, &pos_in, len); if (unlikely(ret)) @@ -1637,15 +1638,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (unlikely(ret)) return ret; - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; - - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - if (len == 0) return 0; @@ -2013,29 +2005,21 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags) { - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); loff_t ret; WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP); - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; - /* * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on * the same mount. Practically, they only need to be on the same file * system. */ - if (inode_in->i_sb != inode_out->i_sb) + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; + ret = generic_file_rw_checks(file_in, file_out); + if (ret < 0) + return ret; if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; diff --git a/include/linux/fs.h b/include/linux/fs.h index ea17858310ff..89b9b73eb581 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3049,6 +3049,7 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); +extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index df2006ba0cfa..a38619a4a6af 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3041,6 +3041,30 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in, return 0; } + +/* + * Performs common checks before doing a file copy/clone + * from @file_in to @file_out. + */ +int generic_file_rw_checks(struct file *file_in, struct file *file_out) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + + /* Don't copy dirs, pipes, sockets... */ + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + + if (!(file_in->f_mode & FMODE_READ) || + !(file_out->f_mode & FMODE_WRITE) || + (file_out->f_flags & O_APPEND)) + return -EBADF; + + return 0; +} + int pagecache_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) -- cgit v1.2.3 From 96e6e8f4a68df2d94800311163faa67124df24e5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 5 Jun 2019 08:04:49 -0700 Subject: vfs: add missing checks to copy_file_range Like the clone and dedupe interfaces we've recently fixed, the copy_file_range() implementation is missing basic sanity, limits and boundary condition tests on the parameters that are passed to it from userspace. Create a new "generic_copy_file_checks()" function modelled on the generic_remap_checks() function to provide this missing functionality. [Amir] Shorten copy length instead of checking pos_in limits because input file size already abides by the limits. Signed-off-by: Dave Chinner Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/read_write.c | 3 ++- include/linux/fs.h | 3 +++ mm/filemap.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index f1900bdb3127..b0fb1176b628 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1626,7 +1626,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; - ret = generic_file_rw_checks(file_in, file_out); + ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, + flags); if (unlikely(ret)) return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index 89b9b73eb581..e4d382c4342a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3050,6 +3050,9 @@ extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); +extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t *count, unsigned int flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index 44361928bbb0..aac71aef4c61 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3056,6 +3056,59 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) return 0; } +/* + * Performs necessary checks before doing a file copy + * + * Can adjust amount of bytes to copy via @req_count argument. + * Returns appropriate error code that caller should return or + * zero in case the copy should be allowed. + */ +int generic_copy_file_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t *req_count, unsigned int flags) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + uint64_t count = *req_count; + loff_t size_in; + int ret; + + ret = generic_file_rw_checks(file_in, file_out); + if (ret) + return ret; + + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode_out)) + return -EPERM; + + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + return -ETXTBSY; + + /* Ensure offsets don't wrap. */ + if (pos_in + count < pos_in || pos_out + count < pos_out) + return -EOVERFLOW; + + /* Shorten the copy to EOF */ + size_in = i_size_read(inode_in); + if (pos_in >= size_in) + count = 0; + else + count = min(count, size_in - (uint64_t)pos_in); + + ret = generic_write_check_limits(file_out, pos_out, &count); + if (ret) + return ret; + + /* Don't allow overlapped copying within the same file. */ + if (inode_in == inode_out && + pos_out + count > pos_in && + pos_out < pos_in + count) + return -EINVAL; + + *req_count = count; + return 0; +} + int pagecache_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) -- cgit v1.2.3 From e38f7f53c35213b1cbce70eee5de7ced17f40d4a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 5 Jun 2019 08:04:49 -0700 Subject: vfs: introduce file_modified() helper The combination of file_remove_privs() and file_update_mtime() is quite common in filesystem ->write_iter() methods. Modelled after the helper file_accessed(), introduce file_modified() and use it from generic_remap_file_range_prep(). Note that the order of calling file_remove_privs() before file_update_mtime() in the helper was matched to the more common order by filesystems and not the current order in generic_remap_file_range_prep(). Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/inode.c | 20 ++++++++++++++++++++ fs/read_write.c | 21 +++------------------ include/linux/fs.h | 2 ++ 3 files changed, 25 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index df6542ec3b88..4348cfb14562 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1899,6 +1899,26 @@ int file_update_time(struct file *file) } EXPORT_SYMBOL(file_update_time); +/* Caller must hold the file's inode lock */ +int file_modified(struct file *file) +{ + int err; + + /* + * Clear the security bits if the process is not being run by root. + * This keeps people from modifying setuid and setgid binaries. + */ + err = file_remove_privs(file); + if (err) + return err; + + if (unlikely(file->f_mode & FMODE_NOCMTIME)) + return 0; + + return file_update_time(file); +} +EXPORT_SYMBOL(file_modified); + int inode_needs_sync(struct inode *inode) { if (IS_SYNC(inode)) diff --git a/fs/read_write.c b/fs/read_write.c index b0fb1176b628..cec7e7b1f693 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1980,25 +1980,10 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, return ret; /* If can't alter the file contents, we're done. */ - if (!(remap_flags & REMAP_FILE_DEDUP)) { - /* Update the timestamps, since we can alter file contents. */ - if (!(file_out->f_mode & FMODE_NOCMTIME)) { - ret = file_update_time(file_out); - if (ret) - return ret; - } + if (!(remap_flags & REMAP_FILE_DEDUP)) + ret = file_modified(file_out); - /* - * Clear the security bits if the process is not being run by - * root. This keeps people from modifying setuid and setgid - * binaries. - */ - ret = file_remove_privs(file_out); - if (ret) - return ret; - } - - return 0; + return ret; } EXPORT_SYMBOL(generic_remap_file_range_prep); diff --git a/include/linux/fs.h b/include/linux/fs.h index e4d382c4342a..79ffa2958bd8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2177,6 +2177,8 @@ static inline void file_accessed(struct file *file) touch_atime(&file->f_path); } +extern int file_modified(struct file *file); + int sync_inode(struct inode *inode, struct writeback_control *wbc); int sync_inode_metadata(struct inode *inode, int wait); -- cgit v1.2.3 From 8c3f406c097b83846c7d18438a905b49d17ae528 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 5 Jun 2019 08:04:50 -0700 Subject: xfs: use file_modified() helper Note that by using the helper, the order of calling file_remove_privs() after file_update_mtime() in xfs_file_aio_write_checks() has changed. Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 76748255f843..916a35cae5e9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -367,20 +367,7 @@ restart: * lock above. Eventually we should look into a way to avoid * the pointless lock roundtrip. */ - if (likely(!(file->f_mode & FMODE_NOCMTIME))) { - error = file_update_time(file); - if (error) - return error; - } - - /* - * If we're writing the file then make sure to clear the setuid and - * setgid bits if the process is not being run by root. This keeps - * people from modifying setuid and setgid binaries. - */ - if (!IS_NOSEC(inode)) - return file_remove_privs(file); - return 0; + return file_modified(file); } static int -- cgit v1.2.3 From 5dae222a5ff0c269730393018a5539cc970a4726 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 5 Jun 2019 08:04:50 -0700 Subject: vfs: allow copy_file_range to copy across devices We want to enable cross-filesystem copy_file_range functionality where possible, so push the "same superblock only" checks down to the individual filesystem callouts so they can make their own decisions about cross-superblock copy offload and fallack to generic_copy_file_range() for cross-superblock copy. [Amir] We do not call ->remap_file_range() in case the files are not on the same sb and do not call ->copy_file_range() in case the files do not belong to the same filesystem driver. This changes behavior of the copy_file_range(2) syscall, which will now allow cross filesystem in-kernel copy. CIFS already supports cross-superblock copy, between two shares to the same server. This functionality will now be available via the copy_file_range(2) syscall. Cc: Steve French Signed-off-by: Dave Chinner Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/ceph/file.c | 4 +++- fs/cifs/cifsfs.c | 2 +- fs/fuse/file.c | 5 ++++- fs/nfs/nfs4file.c | 5 ++++- fs/read_write.c | 18 ++++++++++++------ 5 files changed, 24 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 31d450536175..c5517ffeb11c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1909,6 +1909,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, if (src_inode == dst_inode) return -EINVAL; + if (src_inode->i_sb != dst_inode->i_sb) + return -EXDEV; if (ceph_snap(dst_inode) != CEPH_NOSNAP) return -EROFS; @@ -2109,7 +2111,7 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, len, flags); - if (ret == -EOPNOTSUPP) + if (ret == -EOPNOTSUPP || ret == -EXDEV) ret = generic_copy_file_range(src_file, src_off, dst_file, dst_off, len, flags); return ret; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c65823270313..f11eea6125c1 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1149,7 +1149,7 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, len, flags); free_xid(xid); - if (rc == -EOPNOTSUPP) + if (rc == -EOPNOTSUPP || rc == -EXDEV) rc = generic_copy_file_range(src_file, off, dst_file, destoff, len, flags); return rc; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4c20bf61d9c3..dc342edb399b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3142,6 +3142,9 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (fc->no_copy_file_range) return -EOPNOTSUPP; + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; + if (fc->writeback_cache) { inode_lock(inode_in); err = fuse_writeback_range(inode_in, pos_in, pos_in + len); @@ -3203,7 +3206,7 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, len, flags); - if (ret == -EOPNOTSUPP) + if (ret == -EOPNOTSUPP || ret == -EXDEV) ret = generic_copy_file_range(src_file, src_off, dst_file, dst_off, len, flags); return ret; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 4842f3ab3161..f4157eb1f69d 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -133,6 +133,9 @@ static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t count, unsigned int flags) { + /* Only offload copy if superblock is the same */ + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) @@ -148,7 +151,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count, flags); - if (ret == -EOPNOTSUPP) + if (ret == -EOPNOTSUPP || ret == -EXDEV) ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, count, flags); return ret; diff --git a/fs/read_write.c b/fs/read_write.c index cec7e7b1f693..1f5088dec566 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1599,7 +1599,16 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { - if (file_out->f_op->copy_file_range) + /* + * Although we now allow filesystems to handle cross sb copy, passing + * a file of the wrong filesystem type to filesystem driver can result + * in an attempt to dereference the wrong type of ->private_data, so + * avoid doing that until we really have a good reason. NFS defines + * several different file_system_type structures, but they all end up + * using the same ->copy_file_range() function pointer. + */ + if (file_out->f_op->copy_file_range && + file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) return file_out->f_op->copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); @@ -1622,10 +1631,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (flags != 0) return -EINVAL; - /* this could be relaxed once a method supports cross-fs copies */ - if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) - return -EXDEV; - ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, flags); if (unlikely(ret)) @@ -1648,7 +1653,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). */ - if (file_in->f_op->remap_file_range) { + if (file_in->f_op->remap_file_range && + file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { loff_t cloned; cloned = file_in->f_op->remap_file_range(file_in, pos_in, -- cgit v1.2.3 From fe0da9c09b2dc448ff781d1426ecb36d145ce51b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 5 Jun 2019 08:04:51 -0700 Subject: fuse: copy_file_range needs to strip setuid bits and update timestamps Like ->write_iter(), we update mtime and strip setuid of dst file before copy and like ->read_iter(), we update atime of src file after copy. Signed-off-by: Amir Goldstein Acked-by: Miklos Szeredi Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/fuse/file.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index dc342edb399b..5ae2828beb00 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3155,6 +3155,10 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, inode_lock(inode_out); + err = file_modified(file_out); + if (err) + goto out; + if (fc->writeback_cache) { err = fuse_writeback_range(inode_out, pos_out, pos_out + len); if (err) @@ -3193,6 +3197,7 @@ out: clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); inode_unlock(inode_out); + file_accessed(file_in); return err; } -- cgit v1.2.3