From 1be7f9be0efa4e90547f50b8188f4e70710a1173 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Oct 2016 15:41:48 +1100 Subject: xfs: Fix uninitialized variable in xfs_reflink_reserve_cow_range() with gcc 4.1.2: fs/xfs/xfs_reflink.c: In function xfs_reflink_reserve_cow_range: fs/xfs/xfs_reflink.c:327: warning: error may be used uninitialized in this function Indeed, if "count" is zero, the function will return an uninitialized error value. While "count" is unlikely to be zero, this function is called through the public iomap API. Hence fix this by preinitializing error to zero. Fixes: 2a06705cd5954030 ("xfs: create delalloc extents in CoW fork") Signed-off-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 5965e9455d91..d48a7cc2fe00 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -324,7 +324,7 @@ xfs_reflink_reserve_cow_range( struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb, end_fsb; bool skipped = false; - int error; + int error = 0; trace_xfs_reflink_reserve_cow_range(ip, offset, count); -- cgit v1.2.3 From 576177818e6f1e65f6109ed4a8fae8b60131c861 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:49:19 +1100 Subject: xfs: move inode locking from xfs_reflink_remap_range to xfs_file_share_range We need the iolock protection to stabilizie the IS_SWAPFILE and IS_IMMUTABLE values, as well as preventing new buffered writers re-dirtying the file data that we just wrote out. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 62 ++++++++++++++++++++++++++++++++++------------------ fs/xfs/xfs_reflink.c | 15 ------------- 2 files changed, 41 insertions(+), 36 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index d5b835e82b2d..663761edd778 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -958,38 +958,54 @@ xfs_file_share_range( inode_out = file_inode(file_out); bs = inode_out->i_sb->s_blocksize; + /* Lock both files against IO */ + same_inode = (inode_in == inode_out); + if (same_inode) { + xfs_ilock(XFS_I(inode_in), XFS_IOLOCK_EXCL); + xfs_ilock(XFS_I(inode_in), XFS_MMAPLOCK_EXCL); + } else { + xfs_lock_two_inodes(XFS_I(inode_in), XFS_I(inode_out), + XFS_IOLOCK_EXCL); + xfs_lock_two_inodes(XFS_I(inode_in), XFS_I(inode_out), + XFS_MMAPLOCK_EXCL); + } + /* Don't touch certain kinds of inodes */ + ret = -EPERM; if (IS_IMMUTABLE(inode_out)) - return -EPERM; - if (IS_SWAPFILE(inode_in) || - IS_SWAPFILE(inode_out)) - return -ETXTBSY; - - same_inode = (inode_in == inode_out); + goto out_unlock; + ret = -ETXTBSY; + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + goto out_unlock; /* Don't reflink dirs, pipes, sockets... */ + ret = -EISDIR; if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; + goto out_unlock; + ret = -EINVAL; if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) - return -EINVAL; + goto out_unlock; if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; + goto out_unlock; /* Don't share DAX file data for now. */ if (IS_DAX(inode_in) || IS_DAX(inode_out)) - return -EINVAL; + goto out_unlock; /* Are we going all the way to the end? */ isize = i_size_read(inode_in); - if (isize == 0) - return 0; + if (isize == 0) { + ret = 0; + goto out_unlock; + } + if (len == 0) len = isize - pos_in; /* Ensure offsets don't wrap and the input is inside i_size */ if (pos_in + len < pos_in || pos_out + len < pos_out || pos_in + len > isize) - return -EINVAL; + goto out_unlock; /* Don't allow dedupe past EOF in the dest file */ if (is_dedupe) { @@ -997,7 +1013,7 @@ xfs_file_share_range( disize = i_size_read(inode_out); if (pos_out >= disize || pos_out + len > disize) - return -EINVAL; + goto out_unlock; } /* If we're linking to EOF, continue to the block boundary. */ @@ -1009,28 +1025,32 @@ xfs_file_share_range( /* Only reflink if we're aligned to block boundaries */ if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) - return -EINVAL; + goto out_unlock; /* Don't allow overlapped reflink within the same file */ if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen) - return -EINVAL; + goto out_unlock; /* Wait for the completion of any pending IOs on srcfile */ ret = xfs_file_wait_for_io(inode_in, pos_in, len); if (ret) - goto out; + goto out_unlock; ret = xfs_file_wait_for_io(inode_out, pos_out, len); if (ret) - goto out; + goto out_unlock; if (is_dedupe) flags |= XFS_REFLINK_DEDUPE; ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), pos_out, len, flags); - if (ret < 0) - goto out; -out: +out_unlock: + xfs_iunlock(XFS_I(inode_in), XFS_MMAPLOCK_EXCL); + xfs_iunlock(XFS_I(inode_in), XFS_IOLOCK_EXCL); + if (!same_inode) { + xfs_iunlock(XFS_I(inode_out), XFS_MMAPLOCK_EXCL); + xfs_iunlock(XFS_I(inode_out), XFS_IOLOCK_EXCL); + } return ret; } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index d48a7cc2fe00..3b1c1a6bb5da 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1341,15 +1341,6 @@ xfs_reflink_remap_range( trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); - /* Lock both files against IO */ - if (src->i_ino == dest->i_ino) { - xfs_ilock(src, XFS_IOLOCK_EXCL); - xfs_ilock(src, XFS_MMAPLOCK_EXCL); - } else { - xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL); - xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); - } - /* * Check that the extents are the same. */ @@ -1401,12 +1392,6 @@ xfs_reflink_remap_range( goto out_error; out_error: - xfs_iunlock(src, XFS_MMAPLOCK_EXCL); - xfs_iunlock(src, XFS_IOLOCK_EXCL); - if (src->i_ino != dest->i_ino) { - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - xfs_iunlock(dest, XFS_IOLOCK_EXCL); - } if (error) trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); return error; -- cgit v1.2.3 From 5faaf4fa0a20d38edc4df57baf24ea35b7e91178 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:50:07 +1100 Subject: xfs: merge xfs_reflink_remap_range and xfs_file_share_range There is no clear division of responsibility between those functions, so just merge them into one to keep the code simple. Also move xfs_file_wait_for_io to xfs_reflink.c together with its only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 132 +------------------------------------- fs/xfs/xfs_reflink.c | 178 +++++++++++++++++++++++++++++++++++++++------------ fs/xfs/xfs_reflink.h | 7 +- 3 files changed, 143 insertions(+), 174 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 93729752bccb..6e4f7f900fea 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -909,132 +909,6 @@ out_unlock: return error; } -/* Hook up to the VFS reflink function */ -STATIC int -xfs_file_share_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len, - bool is_dedupe) -{ - struct inode *inode_in; - struct inode *inode_out; - ssize_t ret; - loff_t bs; - loff_t isize; - int same_inode; - loff_t blen; - unsigned int flags = 0; - - inode_in = file_inode(file_in); - inode_out = file_inode(file_out); - bs = inode_out->i_sb->s_blocksize; - - /* Lock both files against IO */ - same_inode = (inode_in == inode_out); - if (same_inode) { - xfs_ilock(XFS_I(inode_in), XFS_IOLOCK_EXCL); - xfs_ilock(XFS_I(inode_in), XFS_MMAPLOCK_EXCL); - } else { - xfs_lock_two_inodes(XFS_I(inode_in), XFS_I(inode_out), - XFS_IOLOCK_EXCL); - xfs_lock_two_inodes(XFS_I(inode_in), XFS_I(inode_out), - XFS_MMAPLOCK_EXCL); - } - - /* Don't touch certain kinds of inodes */ - ret = -EPERM; - if (IS_IMMUTABLE(inode_out)) - goto out_unlock; - ret = -ETXTBSY; - if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) - goto out_unlock; - - /* Don't reflink dirs, pipes, sockets... */ - ret = -EISDIR; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - goto out_unlock; - ret = -EINVAL; - if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) - goto out_unlock; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - goto out_unlock; - - /* Don't share DAX file data for now. */ - if (IS_DAX(inode_in) || IS_DAX(inode_out)) - goto out_unlock; - - /* Are we going all the way to the end? */ - isize = i_size_read(inode_in); - if (isize == 0) { - ret = 0; - goto out_unlock; - } - - if (len == 0) - len = isize - pos_in; - - /* Ensure offsets don't wrap and the input is inside i_size */ - if (pos_in + len < pos_in || pos_out + len < pos_out || - pos_in + len > isize) - goto out_unlock; - - /* Don't allow dedupe past EOF in the dest file */ - if (is_dedupe) { - loff_t disize; - - disize = i_size_read(inode_out); - if (pos_out >= disize || pos_out + len > disize) - goto out_unlock; - } - - /* If we're linking to EOF, continue to the block boundary. */ - if (pos_in + len == isize) - blen = ALIGN(isize, bs) - pos_in; - else - blen = len; - - /* Only reflink if we're aligned to block boundaries */ - if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || - !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) - goto out_unlock; - - /* Don't allow overlapped reflink within the same file */ - if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen) - goto out_unlock; - - /* Wait for the completion of any pending IOs on both files */ - inode_dio_wait(inode_in); - if (!same_inode) - inode_dio_wait(inode_out); - - ret = filemap_write_and_wait_range(inode_in->i_mapping, - pos_in, pos_in + len - 1); - if (ret) - goto out_unlock; - - ret = filemap_write_and_wait_range(inode_out->i_mapping, - pos_out, pos_out + len - 1); - if (ret) - goto out_unlock; - - if (is_dedupe) - flags |= XFS_REFLINK_DEDUPE; - ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), - pos_out, len, flags); - -out_unlock: - xfs_iunlock(XFS_I(inode_in), XFS_MMAPLOCK_EXCL); - xfs_iunlock(XFS_I(inode_in), XFS_IOLOCK_EXCL); - if (!same_inode) { - xfs_iunlock(XFS_I(inode_out), XFS_MMAPLOCK_EXCL); - xfs_iunlock(XFS_I(inode_out), XFS_IOLOCK_EXCL); - } - return ret; -} - STATIC ssize_t xfs_file_copy_range( struct file *file_in, @@ -1046,7 +920,7 @@ xfs_file_copy_range( { int error; - error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, + error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, false); if (error) return error; @@ -1061,7 +935,7 @@ xfs_file_clone_range( loff_t pos_out, u64 len) { - return xfs_file_share_range(file_in, pos_in, file_out, pos_out, + return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, false); } @@ -1084,7 +958,7 @@ xfs_file_dedupe_range( if (len > XFS_MAX_DEDUPE_LEN) len = XFS_MAX_DEDUPE_LEN; - error = xfs_file_share_range(src_file, loff, dst_file, dst_loff, + error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, len, true); if (error) return error; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 3b1c1a6bb5da..6592daa833a4 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1312,19 +1312,26 @@ out_error: */ int xfs_reflink_remap_range( - struct xfs_inode *src, - xfs_off_t srcoff, - struct xfs_inode *dest, - xfs_off_t destoff, - xfs_off_t len, - unsigned int flags) + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) { + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; + loff_t bs = inode_out->i_sb->s_blocksize; + bool same_inode = (inode_in == inode_out); xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; - int error; xfs_extlen_t cowextsize; - bool is_same; + loff_t isize; + ssize_t ret; + loff_t blen; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; @@ -1332,48 +1339,135 @@ xfs_reflink_remap_range( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; + /* Lock both files against IO */ + if (same_inode) { + xfs_ilock(src, XFS_IOLOCK_EXCL); + xfs_ilock(src, XFS_MMAPLOCK_EXCL); + } else { + xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL); + xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); + } + + /* Don't touch certain kinds of inodes */ + ret = -EPERM; + if (IS_IMMUTABLE(inode_out)) + goto out_unlock; + + ret = -ETXTBSY; + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + goto out_unlock; + + + /* Don't reflink dirs, pipes, sockets... */ + ret = -EISDIR; + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + goto out_unlock; + ret = -EINVAL; + if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) + goto out_unlock; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + goto out_unlock; + /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) - return -EINVAL; + goto out_unlock; + + /* Don't share DAX file data for now. */ + if (IS_DAX(inode_in) || IS_DAX(inode_out)) + goto out_unlock; + + /* Are we going all the way to the end? */ + isize = i_size_read(inode_in); + if (isize == 0) { + ret = 0; + goto out_unlock; + } + + if (len == 0) + len = isize - pos_in; + + /* Ensure offsets don't wrap and the input is inside i_size */ + if (pos_in + len < pos_in || pos_out + len < pos_out || + pos_in + len > isize) + goto out_unlock; - if (flags & ~XFS_REFLINK_ALL) - return -EINVAL; + /* Don't allow dedupe past EOF in the dest file */ + if (is_dedupe) { + loff_t disize; - trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); + disize = i_size_read(inode_out); + if (pos_out >= disize || pos_out + len > disize) + goto out_unlock; + } + + /* If we're linking to EOF, continue to the block boundary. */ + if (pos_in + len == isize) + blen = ALIGN(isize, bs) - pos_in; + else + blen = len; + + /* Only reflink if we're aligned to block boundaries */ + if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || + !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) + goto out_unlock; + + /* Don't allow overlapped reflink within the same file */ + if (same_inode) { + if (pos_out + blen > pos_in && pos_out < pos_in + blen) + goto out_unlock; + } + + /* Wait for the completion of any pending IOs on both files */ + inode_dio_wait(inode_in); + if (!same_inode) + inode_dio_wait(inode_out); + + ret = filemap_write_and_wait_range(inode_in->i_mapping, + pos_in, pos_in + len - 1); + if (ret) + goto out_unlock; + + ret = filemap_write_and_wait_range(inode_out->i_mapping, + pos_out, pos_out + len - 1); + if (ret) + goto out_unlock; + + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); /* * Check that the extents are the same. */ - if (flags & XFS_REFLINK_DEDUPE) { - is_same = false; - error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest), - destoff, len, &is_same); - if (error) - goto out_error; + if (is_dedupe) { + bool is_same = false; + + ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out, + len, &is_same); + if (ret) + goto out_unlock; if (!is_same) { - error = -EBADE; - goto out_error; + ret = -EBADE; + goto out_unlock; } } - error = xfs_reflink_set_inode_flag(src, dest); - if (error) - goto out_error; + ret = xfs_reflink_set_inode_flag(src, dest); + if (ret) + goto out_unlock; /* * Invalidate the page cache so that we can clear any CoW mappings * in the destination file. */ - truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff, - PAGE_ALIGN(destoff + len) - 1); + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + len) - 1); - dfsbno = XFS_B_TO_FSBT(mp, destoff); - sfsbno = XFS_B_TO_FSBT(mp, srcoff); + dfsbno = XFS_B_TO_FSBT(mp, pos_out); + sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); - error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, - destoff + len); - if (error) - goto out_error; + ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, + pos_out + len); + if (ret) + goto out_unlock; /* * Carry the cowextsize hint from src to dest if we're sharing the @@ -1381,20 +1475,24 @@ xfs_reflink_remap_range( * has a cowextsize hint, and the destination file does not. */ cowextsize = 0; - if (srcoff == 0 && len == i_size_read(VFS_I(src)) && + if (pos_in == 0 && len == i_size_read(inode_in) && (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && - destoff == 0 && len >= i_size_read(VFS_I(dest)) && + pos_out == 0 && len >= i_size_read(inode_out) && !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) cowextsize = src->i_d.di_cowextsize; - error = xfs_reflink_update_dest(dest, destoff + len, cowextsize); - if (error) - goto out_error; + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); -out_error: - if (error) - trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); - return error; +out_unlock: + xfs_iunlock(src, XFS_MMAPLOCK_EXCL); + xfs_iunlock(src, XFS_IOLOCK_EXCL); + if (src->i_ino != dest->i_ino) { + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + xfs_iunlock(dest, XFS_IOLOCK_EXCL); + } + if (ret) + trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); + return ret; } /* diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 5dc3c8ac12aa..7ddd9f69560d 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -43,11 +43,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); -#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */ -#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE) -extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, - struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, - unsigned int flags); +extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, struct xfs_trans **tpp); extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, -- cgit v1.2.3 From 62c5ac89de7d5eecdbaa94ca3d554b0bd41578b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:52:00 +1100 Subject: xfs: handle "raw" delayed extents xfs_reflink_trim_around_shared Delalloc extents in the extent list contain the number of reserved indirect blocks in their startblock value and don't use the magic DELAYSTARTBLOCK constant. Ensure that xfs_reflink_trim_around_shared handles them properly by checking for isnullstartblock(). Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 6592daa833a4..6c4c215634ec 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared( if (!xfs_is_reflink_inode(ip) || ISUNWRITTEN(irec) || irec->br_startblock == HOLESTARTBLOCK || - irec->br_startblock == DELAYSTARTBLOCK) { + irec->br_startblock == DELAYSTARTBLOCK || + isnullstartblock(irec->br_startblock)) { *shared = false; return 0; } -- cgit v1.2.3 From 3ba020befef030aaabbd5eb82a09f6ddf02a9542 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:53:50 +1100 Subject: xfs: optimize writes to reflink files Instead of reserving space as the first thing in write_begin move it past reading the extent in the data fork. That way we only have to read from the data fork once and can reuse that information for trimming the extent to the shared/unshared boundary. Additionally this allows to easily limit the actual write size to said boundary, and avoid a roundtrip on the ilock. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 56 ++++++++++++++------ fs/xfs/xfs_reflink.c | 142 +++++++++++++++++++++------------------------------ fs/xfs/xfs_reflink.h | 4 +- fs/xfs/xfs_trace.h | 3 +- 4 files changed, 100 insertions(+), 105 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1dabf2eb136a..436e109bb01e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay( xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, &got, &prev); if (!eof && got.br_startoff <= offset_fsb) { + if (xfs_is_reflink_inode(ip)) { + bool shared; + + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), + maxbytes_fsb); + xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); + error = xfs_reflink_reserve_cow(ip, &got, &shared); + if (error) + goto out_unlock; + } + trace_xfs_iomap_found(ip, offset, count, 0, &got); goto done; } @@ -961,19 +972,13 @@ xfs_file_iomap_begin( struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; xfs_fileoff_t offset_fsb, end_fsb; - bool shared, trimmed; int nimaps = 1, error = 0; + bool shared = false, trimmed = false; unsigned lockmode; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { - error = xfs_reflink_reserve_cow_range(ip, offset, length); - if (error < 0) - return error; - } - if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { /* Reserve delalloc blocks for regular writeback. */ @@ -981,7 +986,16 @@ xfs_file_iomap_begin( iomap); } - lockmode = xfs_ilock_data_map_shared(ip); + /* + * COW writes will allocate delalloc space, so we need to make sure + * to take the lock exclusively here. + */ + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { + lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, XFS_ILOCK_EXCL); + } else { + lockmode = xfs_ilock_data_map_shared(ip); + } ASSERT(offset <= mp->m_super->s_maxbytes); if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) @@ -991,19 +1005,24 @@ xfs_file_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, 0); - if (error) { - xfs_iunlock(ip, lockmode); - return error; - } + if (error) + goto out_unlock; - if (flags & (IOMAP_WRITE | IOMAP_ZERO | IOMAP_REPORT)) { + if (flags & IOMAP_REPORT) { /* Trim the mapping to the nearest shared extent boundary. */ error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); - if (error) { - xfs_iunlock(ip, lockmode); - return error; - } + if (error) + goto out_unlock; + } + + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { + error = xfs_reflink_reserve_cow(ip, &imap, &shared); + if (error) + goto out_unlock; + + end_fsb = imap.br_startoff + imap.br_blockcount; + length = XFS_FSB_TO_B(mp, end_fsb) - offset; } if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { @@ -1042,6 +1061,9 @@ xfs_file_iomap_begin( if (shared) iomap->flags |= IOMAP_F_SHARED; return 0; +out_unlock: + xfs_iunlock(ip, lockmode); + return error; } static int diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 6c4c215634ec..9c477de3c1ac 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -228,50 +228,54 @@ xfs_reflink_trim_around_shared( } } -/* Create a CoW reservation for a range of blocks within a file. */ -static int -__xfs_reflink_reserve_cow( +/* + * Trim the passed in imap to the next shared/unshared extent boundary, and + * if imap->br_startoff points to a shared extent reserve space for it in the + * COW fork. In this case *shared is set to true, else to false. + * + * Note that imap will always contain the block numbers for the existing blocks + * in the data fork, as the upper layers need them for read-modify-write + * operations. + */ +int +xfs_reflink_reserve_cow( struct xfs_inode *ip, - xfs_fileoff_t *offset_fsb, - xfs_fileoff_t end_fsb, - bool *skipped) + struct xfs_bmbt_irec *imap, + bool *shared) { - struct xfs_bmbt_irec got, prev, imap; - xfs_fileoff_t orig_end_fsb; - int nimaps, eof = 0, error = 0; - bool shared = false, trimmed = false; + struct xfs_bmbt_irec got, prev; + xfs_fileoff_t end_fsb, orig_end_fsb; + int eof = 0, error = 0; + bool trimmed; xfs_extnum_t idx; xfs_extlen_t align; - /* Already reserved? Skip the refcount btree access. */ - xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, + /* + * Search the COW fork extent list first. This serves two purposes: + * first this implement the speculative preallocation using cowextisze, + * so that we also unshared block adjacent to shared blocks instead + * of just the shared blocks themselves. Second the lookup in the + * extent list is generally faster than going out to the shared extent + * tree. + */ + xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx, &got, &prev); - if (!eof && got.br_startoff <= *offset_fsb) { - end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; - trace_xfs_reflink_cow_found(ip, &got); - goto done; - } + if (!eof && got.br_startoff <= imap->br_startoff) { + trace_xfs_reflink_cow_found(ip, imap); + xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); - /* Read extent from the source file. */ - nimaps = 1; - error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, - &imap, &nimaps, 0); - if (error) - goto out_unlock; - ASSERT(nimaps == 1); + *shared = true; + return 0; + } /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); + error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); if (error) - goto out_unlock; - - end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount; + return error; /* Not shared? Just report the (potentially capped) extent. */ - if (!shared) { - *skipped = true; - goto done; - } + if (!*shared) + return 0; /* * Fork all the shared blocks from our write offset until the end of @@ -279,72 +283,38 @@ __xfs_reflink_reserve_cow( */ error = xfs_qm_dqattach_locked(ip, 0); if (error) - goto out_unlock; + return error; + + end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount; align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); if (align) end_fsb = roundup_64(end_fsb, align); retry: - error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, - end_fsb - *offset_fsb, &got, - &prev, &idx, eof); + error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, + end_fsb - imap->br_startoff, &got, &prev, &idx, eof); switch (error) { case 0: break; case -ENOSPC: case -EDQUOT: /* retry without any preallocation */ - trace_xfs_reflink_cow_enospc(ip, &imap); + trace_xfs_reflink_cow_enospc(ip, imap); if (end_fsb != orig_end_fsb) { end_fsb = orig_end_fsb; goto retry; } /*FALLTHRU*/ default: - goto out_unlock; + return error; } if (end_fsb != orig_end_fsb) xfs_inode_set_cowblocks_tag(ip); trace_xfs_reflink_cow_alloc(ip, &got); -done: - *offset_fsb = end_fsb; -out_unlock: - return error; -} - -/* Create a CoW reservation for part of a file. */ -int -xfs_reflink_reserve_cow_range( - struct xfs_inode *ip, - xfs_off_t offset, - xfs_off_t count) -{ - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb, end_fsb; - bool skipped = false; - int error = 0; - - trace_xfs_reflink_reserve_cow_range(ip, offset, count); - - offset_fsb = XFS_B_TO_FSBT(mp, offset); - end_fsb = XFS_B_TO_FSB(mp, offset + count); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - while (offset_fsb < end_fsb) { - error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb, - &skipped); - if (error) { - trace_xfs_reflink_reserve_cow_range_error(ip, error, - _RET_IP_); - break; - } - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - return error; + return 0; } /* Allocate all CoW reservations covering a range of blocks in a file. */ @@ -359,9 +329,8 @@ __xfs_reflink_allocate_cow( struct xfs_defer_ops dfops; struct xfs_trans *tp; xfs_fsblock_t first_block; - xfs_fileoff_t next_fsb; int nimaps = 1, error; - bool skipped = false; + bool shared; xfs_defer_init(&dfops, &first_block); @@ -372,33 +341,38 @@ __xfs_reflink_allocate_cow( xfs_ilock(ip, XFS_ILOCK_EXCL); - next_fsb = *offset_fsb; - error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); + /* Read extent from the source file. */ + nimaps = 1; + error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, + &imap, &nimaps, 0); + if (error) + goto out_unlock; + ASSERT(nimaps == 1); + + error = xfs_reflink_reserve_cow(ip, &imap, &shared); if (error) goto out_trans_cancel; - if (skipped) { - *offset_fsb = next_fsb; + if (!shared) { + *offset_fsb = imap.br_startoff + imap.br_blockcount; goto out_trans_cancel; } xfs_trans_ijoin(tp, ip, 0); - error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, + error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, XFS_BMAPI_COWFORK, &first_block, XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), &imap, &nimaps, &dfops); if (error) goto out_trans_cancel; - /* We might not have been able to map the whole delalloc extent */ - *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb); - error = xfs_defer_finish(&tp, &dfops, NULL); if (error) goto out_trans_cancel; error = xfs_trans_commit(tp); + *offset_fsb = imap.br_startoff + imap.br_blockcount; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 7ddd9f69560d..fad11607c9ad 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno, extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); -extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, - xfs_off_t offset, xfs_off_t count); +extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, bool *shared); extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ad188d3a83f3..72f9f6b7a76a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); -DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range); +DEFINE_RW_EVENT(xfs_reflink_reserve_cow); DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); @@ -3358,7 +3358,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece); -DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); -- cgit v1.2.3 From fa5c836ca8eb5bad6316ddfc066acbc4e2485356 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:54:14 +1100 Subject: xfs: refactor xfs_bunmapi_cow Split out two helpers for deleting delayed or real extents from the COW fork. This allows to call them directly from xfs_reflink_cow_end_io once that function is refactored to iterate the extent tree. It will also allow to reuse the delalloc deletion from xfs_bunmapi in the future. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 374 ++++++++++++++++++++++++++++------------------- fs/xfs/libxfs/xfs_bmap.h | 5 + fs/xfs/xfs_reflink.c | 5 - 3 files changed, 225 insertions(+), 159 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 381e7659598c..d7ad51132f4f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4859,6 +4859,219 @@ xfs_bmap_split_indlen( return stolen; } +int +xfs_bmap_del_extent_delay( + struct xfs_inode *ip, + int whichfork, + xfs_extnum_t *idx, + struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_bmbt_irec new; + int64_t da_old, da_new, da_diff = 0; + xfs_fileoff_t del_endoff, got_endoff; + xfs_filblks_t got_indlen, new_indlen, stolen; + int error = 0, state = 0; + bool isrt; + + XFS_STATS_INC(mp, xs_del_exlist); + + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got->br_startoff + got->br_blockcount; + da_old = startblockval(got->br_startblock); + da_new = 0; + + ASSERT(*idx >= 0); + ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(del->br_blockcount > 0); + ASSERT(got->br_startoff <= del->br_startoff); + ASSERT(got_endoff >= del_endoff); + + if (isrt) { + int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); + + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_frextents(mp, rtexts); + } + + /* + * Update the inode delalloc counter now and wait to update the + * sb counters as we might have to borrow some blocks for the + * indirect block accounting. + */ + xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0, + isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + ip->i_delayed_blks -= del->br_blockcount; + + if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; + + if (got->br_startoff == del->br_startoff) + state |= BMAP_LEFT_CONTIG; + if (got_endoff == del_endoff) + state |= BMAP_RIGHT_CONTIG; + + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_iext_remove(ip, *idx, 1, state); + --*idx; + break; + case BMAP_LEFT_CONTIG: + /* + * Deleting the first part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_startoff = del_endoff; + got->br_blockcount -= del->br_blockcount; + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, + got->br_blockcount), da_old); + got->br_startblock = nullstartblock((int)da_new); + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case BMAP_RIGHT_CONTIG: + /* + * Deleting the last part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_blockcount = got->br_blockcount - del->br_blockcount; + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, + got->br_blockcount), da_old); + got->br_startblock = nullstartblock((int)da_new); + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case 0: + /* + * Deleting the middle of the extent. + * + * Distribute the original indlen reservation across the two new + * extents. Steal blocks from the deleted extent if necessary. + * Stealing blocks simply fudges the fdblocks accounting below. + * Warn if either of the new indlen reservations is zero as this + * can lead to delalloc problems. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + + got->br_blockcount = del->br_startoff - got->br_startoff; + got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); + + new.br_blockcount = got_endoff - del_endoff; + new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount); + + WARN_ON_ONCE(!got_indlen || !new_indlen); + stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen, + del->br_blockcount); + + got->br_startblock = nullstartblock((int)got_indlen); + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_); + + new.br_startoff = del_endoff; + new.br_state = got->br_state; + new.br_startblock = nullstartblock((int)new_indlen); + + ++*idx; + xfs_iext_insert(ip, *idx, 1, &new, state); + + da_new = got_indlen + new_indlen - stolen; + del->br_blockcount -= stolen; + break; + } + + ASSERT(da_old >= da_new); + da_diff = da_old - da_new; + if (!isrt) + da_diff += del->br_blockcount; + if (da_diff) + xfs_mod_fdblocks(mp, da_diff, false); + return error; +} + +void +xfs_bmap_del_extent_cow( + struct xfs_inode *ip, + xfs_extnum_t *idx, + struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec new; + xfs_fileoff_t del_endoff, got_endoff; + int state = BMAP_COWFORK; + + XFS_STATS_INC(mp, xs_del_exlist); + + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got->br_startoff + got->br_blockcount; + + ASSERT(*idx >= 0); + ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(del->br_blockcount > 0); + ASSERT(got->br_startoff <= del->br_startoff); + ASSERT(got_endoff >= del_endoff); + ASSERT(!isnullstartblock(got->br_startblock)); + + if (got->br_startoff == del->br_startoff) + state |= BMAP_LEFT_CONTIG; + if (got_endoff == del_endoff) + state |= BMAP_RIGHT_CONTIG; + + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_iext_remove(ip, *idx, 1, state); + --*idx; + break; + case BMAP_LEFT_CONTIG: + /* + * Deleting the first part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_startoff = del_endoff; + got->br_blockcount -= del->br_blockcount; + got->br_startblock = del->br_startblock + del->br_blockcount; + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case BMAP_RIGHT_CONTIG: + /* + * Deleting the last part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_blockcount -= del->br_blockcount; + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case 0: + /* + * Deleting the middle of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_blockcount = del->br_startoff - got->br_startoff; + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + new.br_startoff = del_endoff; + new.br_blockcount = got_endoff - del_endoff; + new.br_state = got->br_state; + new.br_startblock = del->br_startblock + del->br_blockcount; + + ++*idx; + xfs_iext_insert(ip, *idx, 1, &new, state); + break; + } +} + /* * Called by xfs_bmapi to update file extent records and the btree * after removing space (or undoing a delayed allocation). @@ -5207,167 +5420,20 @@ xfs_bunmapi_cow( struct xfs_inode *ip, struct xfs_bmbt_irec *del) { - xfs_filblks_t da_new; - xfs_filblks_t da_old; - xfs_fsblock_t del_endblock = 0; - xfs_fileoff_t del_endoff; - int delay; struct xfs_bmbt_rec_host *ep; - int error; struct xfs_bmbt_irec got; - xfs_fileoff_t got_endoff; - struct xfs_ifork *ifp; - struct xfs_mount *mp; - xfs_filblks_t nblks; struct xfs_bmbt_irec new; - /* REFERENCED */ - uint qfield; - xfs_filblks_t temp; - xfs_filblks_t temp2; - int state = BMAP_COWFORK; int eof; xfs_extnum_t eidx; - mp = ip->i_mount; - XFS_STATS_INC(mp, xs_del_exlist); - ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof, - &eidx, &got, &new); - - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - ASSERT((eidx >= 0) && (eidx < ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t))); - ASSERT(del->br_blockcount > 0); - ASSERT(got.br_startoff <= del->br_startoff); - del_endoff = del->br_startoff + del->br_blockcount; - got_endoff = got.br_startoff + got.br_blockcount; - ASSERT(got_endoff >= del_endoff); - delay = isnullstartblock(got.br_startblock); - ASSERT(isnullstartblock(del->br_startblock) == delay); - qfield = 0; - error = 0; - /* - * If deleting a real allocation, must free up the disk space. - */ - if (!delay) { - nblks = del->br_blockcount; - qfield = XFS_TRANS_DQ_BCOUNT; - /* - * Set up del_endblock and cur for later. - */ - del_endblock = del->br_startblock + del->br_blockcount; - da_old = da_new = 0; - } else { - da_old = startblockval(got.br_startblock); - da_new = 0; - nblks = 0; - } - qfield = qfield; - nblks = nblks; - - /* - * Set flag value to use in switch statement. - * Left-contig is 2, right-contig is 1. - */ - switch (((got.br_startoff == del->br_startoff) << 1) | - (got_endoff == del_endoff)) { - case 3: - /* - * Matches the whole extent. Delete the entry. - */ - xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK); - --eidx; - break; - - case 2: - /* - * Deleting the first part of the extent. - */ - trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); - xfs_bmbt_set_startoff(ep, del_endoff); - temp = got.br_blockcount - del->br_blockcount; - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - da_new = temp; - break; - } - xfs_bmbt_set_startblock(ep, del_endblock); - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - break; - - case 1: - /* - * Deleting the last part of the extent. - */ - temp = got.br_blockcount - del->br_blockcount; - trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - da_new = temp; - break; - } - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - break; - - case 0: - /* - * Deleting the middle of the extent. - */ - temp = del->br_startoff - got.br_startoff; - trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - new.br_startoff = del_endoff; - temp2 = got_endoff - del_endoff; - new.br_blockcount = temp2; - new.br_state = got.br_state; - if (!delay) { - new.br_startblock = del_endblock; - } else { - temp = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - temp2 = xfs_bmap_worst_indlen(ip, temp2); - new.br_startblock = nullstartblock((int)temp2); - da_new = temp + temp2; - while (da_new > da_old) { - if (temp) { - temp--; - da_new--; - xfs_bmbt_set_startblock(ep, - nullstartblock((int)temp)); - } - if (da_new == da_old) - break; - if (temp2) { - temp2--; - da_new--; - new.br_startblock = - nullstartblock((int)temp2); - } - } - } - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - xfs_iext_insert(ip, eidx + 1, 1, &new, state); - ++eidx; - break; - } - - /* - * Account for change in delayed indirect blocks. - * Nothing to do for disk quota accounting here. - */ - ASSERT(da_old >= da_new); - if (da_old > da_new) - xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false); - - return error; + &eidx, &got, &new); + ASSERT(ep); + if (isnullstartblock(got.br_startblock)) + xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, &eidx, &got, del); + else + xfs_bmap_del_extent_cow(ip, &eidx, &got, del); + return 0; } /* diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index eb86af0c988b..5f18248cbac2 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -224,6 +224,11 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extnum_t nexts, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops, int *done); int xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del); +int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, + xfs_extnum_t *idx, struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del); +void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx, + struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 9c477de3c1ac..09bd5dcd90cd 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -534,11 +534,6 @@ xfs_reflink_cancel_cow_blocks( trace_xfs_reflink_cancel_cow(ip, &irec); if (irec.br_startblock == DELAYSTARTBLOCK) { - /* Free a delayed allocation. */ - xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount, - false); - ip->i_delayed_blks -= irec.br_blockcount; - /* Remove the mapping from the CoW fork. */ error = xfs_bunmapi_cow(ip, &irec); if (error) -- cgit v1.2.3 From 3e0ee78f7a5a8ed43b129e9e4c5c0ff10c71df74 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:54:31 +1100 Subject: xfs: optimize xfs_reflink_cancel_cow_blocks Rewrite xfs_reflink_cancel_cow_blocks so that we only do a search for the first extent in the extent list and then iterate over the remaining extents using the extent index, passing the extent we operate on directly to xfs_bmap_del_extent_delay or xfs_bmap_del_extent_cow instead of going through xfs_bunmapi and doing yet another extent list lookup. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 51 +++++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 09bd5dcd90cd..1e9c589d22cc 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -511,53 +511,49 @@ xfs_reflink_cancel_cow_blocks( xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb) { - struct xfs_bmbt_irec irec; - xfs_filblks_t count_fsb; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got, prev, del; + xfs_extnum_t idx; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; - int error = 0; - int nimaps; + int error = 0, eof = 0; if (!xfs_is_reflink_inode(ip)) return 0; - /* Go find the old extent in the CoW fork. */ - while (offset_fsb < end_fsb) { - nimaps = 1; - count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); - error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, - &nimaps, XFS_BMAPI_COWFORK); - if (error) - break; - ASSERT(nimaps == 1); + xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx, + &got, &prev); + if (eof) + return 0; - trace_xfs_reflink_cancel_cow(ip, &irec); + while (got.br_startoff < end_fsb) { + del = got; + xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); + trace_xfs_reflink_cancel_cow(ip, &del); - if (irec.br_startblock == DELAYSTARTBLOCK) { - /* Remove the mapping from the CoW fork. */ - error = xfs_bunmapi_cow(ip, &irec); + if (isnullstartblock(del.br_startblock)) { + error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, + &idx, &got, &del); if (error) break; - } else if (irec.br_startblock == HOLESTARTBLOCK) { - /* empty */ } else { xfs_trans_ijoin(*tpp, ip, 0); xfs_defer_init(&dfops, &firstfsb); /* Free the CoW orphan record. */ error = xfs_refcount_free_cow_extent(ip->i_mount, - &dfops, irec.br_startblock, - irec.br_blockcount); + &dfops, del.br_startblock, + del.br_blockcount); if (error) break; xfs_bmap_add_free(ip->i_mount, &dfops, - irec.br_startblock, irec.br_blockcount, + del.br_startblock, del.br_blockcount, NULL); /* Update quota accounting */ xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, - -(long)irec.br_blockcount); + -(long)del.br_blockcount); /* Roll the transaction */ error = xfs_defer_finish(tpp, &dfops, ip); @@ -567,13 +563,12 @@ xfs_reflink_cancel_cow_blocks( } /* Remove the mapping from the CoW fork. */ - error = xfs_bunmapi_cow(ip, &irec); - if (error) - break; + xfs_bmap_del_extent_cow(ip, &idx, &got, &del); } - /* Roll on... */ - offset_fsb = irec.br_startoff + irec.br_blockcount; + if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)) + return 0; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); } return error; -- cgit v1.2.3 From c1112b6e626637ec09319883b63e705a931c398b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:54:45 +1100 Subject: xfs: optimize xfs_reflink_end_cow Instead of doing a full extent list search for each extent that is to be deleted using xfs_bmapi_read and then doing another one inside of xfs_bunmapi_cow use the same scheme that xfs_bumapi uses: look up the last extent to be deleted and then use the extent index to walk downward until we are outside the range to be deleted. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 119 ++++++++++++++++++++++++--------------------------- fs/xfs/xfs_trace.h | 1 - 2 files changed, 56 insertions(+), 64 deletions(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 1e9c589d22cc..cd308f119e20 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -633,25 +633,26 @@ xfs_reflink_end_cow( xfs_off_t offset, xfs_off_t count) { - struct xfs_bmbt_irec irec; - struct xfs_bmbt_irec uirec; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got, prev, del; struct xfs_trans *tp; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; - xfs_filblks_t count_fsb; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; - int error; + int error, eof = 0; unsigned int resblks; - xfs_filblks_t ilen; xfs_filblks_t rlen; - int nimaps; + xfs_extnum_t idx; trace_xfs_reflink_end_cow(ip, offset, count); + /* No COW extents? That's easy! */ + if (ifp->if_bytes == 0) + return 0; + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); - count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); /* Start a rolling transaction to switch the mappings */ resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); @@ -663,72 +664,65 @@ xfs_reflink_end_cow( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - /* Go find the old extent in the CoW fork. */ - while (offset_fsb < end_fsb) { - /* Read extent from the source file */ - nimaps = 1; - count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); - error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, - &nimaps, XFS_BMAPI_COWFORK); - if (error) - goto out_cancel; - ASSERT(nimaps == 1); + xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx, + &got, &prev); - ASSERT(irec.br_startblock != DELAYSTARTBLOCK); - trace_xfs_reflink_cow_remap(ip, &irec); + /* If there is a hole at end_fsb - 1 go to the previous extent */ + if (eof || got.br_startoff > end_fsb) { + ASSERT(idx > 0); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got); + } - /* - * We can have a hole in the CoW fork if part of a directio - * write is CoW but part of it isn't. - */ - rlen = ilen = irec.br_blockcount; - if (irec.br_startblock == HOLESTARTBLOCK) + /* Walk backwards until we're out of the I/O range... */ + while (got.br_startoff + got.br_blockcount > offset_fsb) { + del = got; + xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); + + /* Extent delete may have bumped idx forward */ + if (!del.br_blockcount) { + idx--; goto next_extent; + } + + ASSERT(!isnullstartblock(got.br_startblock)); /* Unmap the old blocks in the data fork. */ - while (rlen) { - xfs_defer_init(&dfops, &firstfsb); - error = __xfs_bunmapi(tp, ip, irec.br_startoff, - &rlen, 0, 1, &firstfsb, &dfops); - if (error) - goto out_defer; - - /* - * Trim the extent to whatever got unmapped. - * Remember, bunmapi works backwards. - */ - uirec.br_startblock = irec.br_startblock + rlen; - uirec.br_startoff = irec.br_startoff + rlen; - uirec.br_blockcount = irec.br_blockcount - rlen; - irec.br_blockcount = rlen; - trace_xfs_reflink_cow_remap_piece(ip, &uirec); + xfs_defer_init(&dfops, &firstfsb); + rlen = del.br_blockcount; + error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1, + &firstfsb, &dfops); + if (error) + goto out_defer; - /* Free the CoW orphan record. */ - error = xfs_refcount_free_cow_extent(tp->t_mountp, - &dfops, uirec.br_startblock, - uirec.br_blockcount); - if (error) - goto out_defer; + /* Trim the extent to whatever got unmapped. */ + if (rlen) { + xfs_trim_extent(&del, del.br_startoff + rlen, + del.br_blockcount - rlen); + } + trace_xfs_reflink_cow_remap(ip, &del); - /* Map the new blocks into the data fork. */ - error = xfs_bmap_map_extent(tp->t_mountp, &dfops, - ip, &uirec); - if (error) - goto out_defer; + /* Free the CoW orphan record. */ + error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops, + del.br_startblock, del.br_blockcount); + if (error) + goto out_defer; - /* Remove the mapping from the CoW fork. */ - error = xfs_bunmapi_cow(ip, &uirec); - if (error) - goto out_defer; + /* Map the new blocks into the data fork. */ + error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del); + if (error) + goto out_defer; - error = xfs_defer_finish(&tp, &dfops, ip); - if (error) - goto out_defer; - } + /* Remove the mapping from the CoW fork. */ + xfs_bmap_del_extent_cow(ip, &idx, &got, &del); + + error = xfs_defer_finish(&tp, &dfops, ip); + if (error) + goto out_defer; next_extent: - /* Roll on... */ - offset_fsb = irec.br_startoff + ilen; + if (idx < 0) + break; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); } error = xfs_trans_commit(tp); @@ -739,7 +733,6 @@ next_extent: out_defer: xfs_defer_cancel(&dfops); -out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 72f9f6b7a76a..0907752be62d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3356,7 +3356,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); -DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece); DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); -- cgit v1.2.3 From c17a8ef43d6b80ed3519b828c37d18645445949f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 24 Oct 2016 14:21:08 +1100 Subject: xfs: clear cowblocks tag when cow fork is emptied The background cowblocks scan job takes care of scanning for inodes with potentially lingering blocks in the cow fork and clearing them out. If the background scanner reclaims the cow fork blocks, however, it doesn't immediately clear the cowblocks tag from the inode. Instead, the inode remains tagged until the background scanner comes around again, discovers the inode cow fork has no blocks, clears the tag and fires the trace_xfs_inode_free_cowblocks_invalid() tracepoint to indicate that the inode may have been incorrectly tagged. This is not a major functional problem as the tag is ultimately cleared. Nonetheless, clear the tag when an inode cow fork is explicitly emptied to avoid the extra round trip through the background scanner and spurious "invalid" tracepoint. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_reflink.c') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cd308f119e20..a279b4e7f5fe 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -567,10 +567,14 @@ xfs_reflink_cancel_cow_blocks( } if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)) - return 0; + break; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); } + /* clear tag if cow fork is emptied */ + if (!ifp->if_bytes) + xfs_inode_clear_cowblocks_tag(ip); + return error; } -- cgit v1.2.3