xfs: stop artificially limiting the length of bunmap calls

In commit e1a4e37cc7b6, we clamped the length of bunmapi calls on the data forks of shared files to avoid two failure scenarios: one where the extent being unmapped is so sparsely shared that we exceed the transaction reservation with the sheer number of refcount btree updates and EFI intent items; and the other where we attach so many deferred updates to the transaction that we pin the log tail and later the log head meets the tail, causing the log to livelock. We avoid triggering the first problem by tracking the number of ops in the refcount btree cursor and forcing a requeue of the refcount intent item any time we think that we might be close to overflowing. This has been baked into XFS since before the original e1a4 patch. A recent patchset fixed the second problem by changing the deferred ops code to finish all the work items created by each round of trying to complete a refcount intent item, which eliminates the long chains of deferred items (27dad); and causing long-running transactions to relog their intent log items when space in the log gets low (74f4d). Because this clamp affects /any/ unmapping request regardless of the sharing factors of the component blocks, it degrades the performance of all large unmapping requests -- whereas with an unshared file we can unmap millions of blocks in one go, shared files are limited to unmapping a few thousand blocks at a time, which causes the upper level code to spin in a bunmapi loop even if it wasn't needed. This also eliminates one more place where log recovery behavior can differ from online behavior, because bunmapi operations no longer need to requeue. Partial-revert-of: e1a4e37cc7b6 ("xfs: try to avoid blowing out the transaction reservation when bunmaping a shared extent") Depends: 27dada070d59 ("xfs: change the order in which child and parent defer ops ar finished") Depends: 74f4d6a1e065 ("xfs: only relog deferred intent items if free space in the log gets low") Signed-off-by: Darrick J. Wong <djwong@kernel.org>
author: Darrick J. Wong <djwong@kernel.org> 2021-09-01 10:40:13 -0700
committer: Darrick J. Wong <djwong@kernel.org> 2021-09-17 18:54:48 -0700
commit: 298c3699d1a40ee7691cd2698c1457b62fff1699 (patch)
tree: d679e5d7c89482c395330041494f4d18848a0610
parent: a17640128f0b0192d58cbd1b918475b9ff67b9b3 (diff)
3 files changed, 5 insertions, 30 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index b48230f1a361..447aa3829ed8 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5349,7 +5349,6 @@ __xfs_bunmapi(
 	int			whichfork;	/* data or attribute fork */
 	xfs_fsblock_t		sum;
 	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
-	xfs_fileoff_t		max_len;
 	xfs_fileoff_t		end;
 	struct xfs_iext_cursor	icur;
 	bool			done = false;
@@ -5368,16 +5367,6 @@ __xfs_bunmapi(
 	ASSERT(len > 0);
 	ASSERT(nexts >= 0);
 
-	/*
-	 * Guesstimate how many blocks we can unmap without running the risk of
-	 * blowing out the transaction with a mix of EFIs and reflink
-	 * adjustments.
-	 */
-	if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
-		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
-	else
-		max_len = len;
-
 	error = xfs_iread_extents(tp, ip, whichfork);
 	if (error)
 		return error;
@@ -5416,7 +5405,7 @@ __xfs_bunmapi(
 
 	extno = 0;
 	while (end != (xfs_fileoff_t)-1 && end >= start &&
-	       (nexts == 0 || extno < nexts) && max_len > 0) {
+	       (nexts == 0 || extno < nexts)) {
 		/*
 		 * Is the found extent after a hole in which end lives?
 		 * Just back up to the previous extent, if so.
@@ -5450,14 +5439,6 @@ __xfs_bunmapi(
 		if (del.br_startoff + del.br_blockcount > end + 1)
 			del.br_blockcount = end + 1 - del.br_startoff;
 
-		/* How much can we safely unmap? */
-		if (max_len < del.br_blockcount) {
-			del.br_startoff += del.br_blockcount - max_len;
-			if (!wasdel)
-				del.br_startblock += del.br_blockcount - max_len;
-			del.br_blockcount = max_len;
-		}
-
 		if (!isrt)
 			goto delete;
 
@@ -5593,7 +5574,6 @@ delete:
 		if (error)
 			goto error0;
 
-		max_len -= del.br_blockcount;
 		end = del.br_startoff - 1;
 nodelete:
 		/*
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 64aec6279ad8..f8bc10728e93 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -959,6 +959,7 @@ xfs_refcount_adjust_extents(
 			 * Either cover the hole (increment) or
 			 * delete the range (decrement).
 			 */
+			cur->bc_ag.refc.nr_ops++;
 			if (tmp.rc_refcount) {
 				error = xfs_refcount_insert(cur, &tmp,
 						&found_tmp);
@@ -969,7 +970,6 @@ xfs_refcount_adjust_extents(
 					error = -EFSCORRUPTED;
 					goto out_error;
 				}
-				cur->bc_ag.refc.nr_ops++;
 			} else {
 				fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
 						cur->bc_ag.pag->pag_agno,
@@ -1000,11 +1000,11 @@ xfs_refcount_adjust_extents(
 		ext.rc_refcount += adj;
 		trace_xfs_refcount_modify_extent(cur->bc_mp,
 				cur->bc_ag.pag->pag_agno, &ext);
+		cur->bc_ag.refc.nr_ops++;
 		if (ext.rc_refcount > 1) {
 			error = xfs_refcount_update(cur, &ext);
 			if (error)
 				goto out_error;
-			cur->bc_ag.refc.nr_ops++;
 		} else if (ext.rc_refcount == 1) {
 			error = xfs_refcount_delete(cur, &found_rec);
 			if (error)
@@ -1013,7 +1013,6 @@ xfs_refcount_adjust_extents(
 				error = -EFSCORRUPTED;
 				goto out_error;
 			}
-			cur->bc_ag.refc.nr_ops++;
 			goto advloop;
 		} else {
 			fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 02cb3aa405be..b7e35b761499 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -66,15 +66,11 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
  * reservation and crash the fs.  Each record adds 12 bytes to the
  * log (plus any key updates) so we'll conservatively assume 32 bytes
  * per record.  We must also leave space for btree splits on both ends
- * of the range and space for the CUD and a new CUI.
+ * of the range and space for the CUD and a new CUI.  Each EFI that we
+ * attach to the transaction also consumes ~32 bytes.
  */
 #define XFS_REFCOUNT_ITEM_OVERHEAD	32
 
-static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
-{
-	return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
-}
-
 extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
 		xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
 union xfs_btree_rec;
author	Darrick J. Wong <djwong@kernel.org>	2021-09-01 10:40:13 -0700
committer	Darrick J. Wong <djwong@kernel.org>	2021-09-17 18:54:48 -0700
commit	298c3699d1a40ee7691cd2698c1457b62fff1699 (patch)
tree	d679e5d7c89482c395330041494f4d18848a0610
parent	a17640128f0b0192d58cbd1b918475b9ff67b9b3 (diff)