summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2021-09-01 11:24:51 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-09-17 18:55:28 -0700
commit45494d031f92d7aee3396fc35ea8e74e2b59fd68 (patch)
tree88fdfa26b0258f5f645b4af40b19df83d3ba18da
parent146d6bb89e4f7b33a3b1c1ac0f7621b6cdc7e004 (diff)
xfs: don't free EOF blocks on read close
When we have a workload that does open/read/close in parallel with other allocation, the file becomes rapidly fragmented. This is due to close() calling xfs_release() and removing the speculative preallocation beyond EOF. The existing open/*/close heuristic in xfs_release() does not catch this as a sync writer does not leave delayed allocation blocks allocated on the inode for later writeback that can be detected in xfs_release() and hence XFS_IDIRTY_RELEASE never gets set. In xfs_file_release(), we know more about the released file context, and so we need to communicate some of the details to xfs_release() so it can do the right thing here and skip EOF block truncation. This defers the EOF block cleanup for synchronous write contexts to the background EOF block cleaner which will clean up within a few minutes. Before: Test 1: sync write fragmentation counts /mnt/scratch/file.0: 919 /mnt/scratch/file.1: 916 /mnt/scratch/file.2: 919 /mnt/scratch/file.3: 920 /mnt/scratch/file.4: 920 /mnt/scratch/file.5: 921 /mnt/scratch/file.6: 916 /mnt/scratch/file.7: 918 After: Test 1: sync write fragmentation counts /mnt/scratch/file.0: 24 /mnt/scratch/file.1: 24 /mnt/scratch/file.2: 11 /mnt/scratch/file.3: 24 /mnt/scratch/file.4: 3 /mnt/scratch/file.5: 24 /mnt/scratch/file.6: 24 /mnt/scratch/file.7: 23 Signed-off-by: Dave Chinner <dchinner@redhat.com> [darrick: wordsmithing, fix commit message] Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/xfs_file.c14
-rw-r--r--fs/xfs/xfs_inode.c9
-rw-r--r--fs/xfs/xfs_inode.h2
3 files changed, 18 insertions, 7 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index cea3c14e3ba0..11f57c73f748 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1413,12 +1413,22 @@ xfs_dir_open(
return error;
}
+/*
+ * When we release the file, we don't want it to trim EOF blocks if it is a
+ * readonly context. This avoids open/read/close workloads from removing
+ * EOF blocks that other writers depend upon to reduce fragmentation.
+ */
STATIC int
xfs_file_release(
struct inode *inode,
- struct file *filp)
+ struct file *file)
{
- return xfs_release(XFS_I(inode));
+ bool free_eof_blocks = true;
+
+ if ((file->f_mode & (FMODE_WRITE | FMODE_READ)) == FMODE_READ)
+ free_eof_blocks = false;
+
+ return xfs_release(XFS_I(inode), free_eof_blocks);
}
STATIC int
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 978a5ef43239..3b3e7390e66d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1198,10 +1198,11 @@ out:
int
xfs_release(
- xfs_inode_t *ip)
+ struct xfs_inode *ip,
+ bool want_free_eofblocks)
{
- xfs_mount_t *mp = ip->i_mount;
- int error = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ int error = 0;
if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0))
return 0;
@@ -1243,7 +1244,7 @@ xfs_release(
* another chance to drop them once the last reference to the inode is
* dropped, so we'll never leak blocks permanently.
*/
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
+ if (!want_free_eofblocks || !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
return 0;
if (xfs_can_free_eofblocks(ip, false)) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d2f72c536627..7801da45c20e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -416,7 +416,7 @@ enum layout_break_reason {
#define XFS_INHERIT_GID(pip) \
(xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID))
-int xfs_release(struct xfs_inode *ip);
+int xfs_release(struct xfs_inode *ip, bool can_free_eofblocks);
void xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);