summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 11:15:53 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:40:56 -0700
commit230372cfd627f2e9f11da2963693c8ebee3d6ba5 (patch)
tree88f2dda3bbe10528534ae13ef0502e248653d6bd
parentb23073c8965c9a39c9ee68278d9a1c0121865963 (diff)
xfs: compute the maximum height of the rmap btree when reflink enabled
Instead of assuming that the hardcoded XFS_BTREE_MAXLEVELS value is big enough to handle the maximally tall rmap btree when all blocks are in use and maximally shared, let's compute the maximum height assuming the rmapbt consumes as many blocks as possible. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Chandan Babu R <chandan.babu@oracle.com>
-rw-r--r--fs/xfs/libxfs/xfs_btree.c34
-rw-r--r--fs/xfs/libxfs/xfs_btree.h2
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c40
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.h2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c17
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h7
-rw-r--r--fs/xfs/xfs_mount.c2
7 files changed, 81 insertions, 23 deletions
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index d754728f370b..370f4ecb1973 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4770,6 +4770,40 @@ xfs_btree_compute_maxlevels(
}
/*
+ * Compute the maximum height of a btree that is allowed to consume up to the
+ * given number of blocks.
+ */
+unsigned int
+xfs_btree_compute_maxlevels_size(
+ unsigned long long max_btblocks,
+ unsigned int leaf_mnr)
+{
+ unsigned long long leaf_blocks = leaf_mnr;
+ unsigned long long blocks_left;
+ unsigned int maxlevels;
+
+ if (max_btblocks < 1)
+ return 0;
+
+ /*
+ * The loop increments maxlevels as long as there would be enough
+ * blocks left in the reservation to handle each node block at the
+ * current level pointing to the minimum possible number of leaf blocks
+ * at the next level down. We start the loop assuming a single-level
+ * btree consuming one block.
+ */
+ maxlevels = 1;
+ blocks_left = max_btblocks - 1;
+ while (leaf_blocks < blocks_left) {
+ maxlevels++;
+ blocks_left -= leaf_blocks;
+ leaf_blocks *= leaf_mnr;
+ }
+
+ return maxlevels;
+}
+
+/*
* Query a regular btree for all records overlapping a given interval.
* Start with a LE lookup of the key of low_rec and return all records
* until we find a record with a key greater than the key of high_rec.
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 0ea8374e7b29..a3be6a9a7e09 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -488,6 +488,8 @@ xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp,
unsigned int max_recs);
uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
+unsigned int xfs_btree_compute_maxlevels_size(unsigned long long max_btblocks,
+ unsigned int leaf_mnr);
unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
/*
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 167daffa7022..ed2670158d6c 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -545,30 +545,32 @@ xfs_rmapbt_maxrecs(
}
/* Compute the maximum height of an rmap btree. */
-void
+unsigned int
xfs_rmapbt_compute_maxlevels(
- struct xfs_mount *mp)
+ struct xfs_mount *mp)
{
+ if (!xfs_has_reflink(mp)) {
+ /*
+ * If there's no block sharing, compute the maximum rmapbt
+ * height assuming one rmap record per AG block.
+ */
+ return xfs_btree_compute_maxlevels(mp->m_rmap_mnr,
+ mp->m_sb.sb_agblocks);
+ }
+
/*
- * On a non-reflink filesystem, the maximum number of rmap
- * records is the number of blocks in the AG, hence the max
- * rmapbt height is log_$maxrecs($agblocks). However, with
- * reflink each AG block can have up to 2^32 (per the refcount
- * record format) owners, which means that theoretically we
- * could face up to 2^64 rmap records.
+ * Compute the asymptotic maxlevels for an rmapbt on a reflink fs.
*
- * That effectively means that the max rmapbt height must be
- * XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG
- * blocks to feed the rmapbt long before the rmapbt reaches
- * maximum height. The reflink code uses ag_resv_critical to
- * disallow reflinking when less than 10% of the per-AG metadata
- * block reservation since the fallback is a regular file copy.
+ * On a reflink filesystem, each AG block can have up to 2^32 (per the
+ * refcount record format) owners, which means that theoretically we
+ * could face up to 2^64 rmap records. However, we're likely to run
+ * out of blocks in the AG long before that happens, which means that
+ * we must compute the max height based on what the btree will look
+ * like if it consumes almost all the blocks in the AG due to maximal
+ * sharing factor.
*/
- if (xfs_has_reflink(mp))
- mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
- else
- mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
- mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
+ return xfs_btree_compute_maxlevels_size(mp->m_sb.sb_agblocks,
+ mp->m_rmap_mnr[1]);
}
/* Calculate the refcount btree size for some records. */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 9b58efc718f7..d8c7253f1a7a 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -50,7 +50,7 @@ void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur,
struct xfs_trans *tp, struct xfs_buf *agbp);
unsigned int xfs_rmapbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen,
bool leaf);
-extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
+unsigned int xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 8086f043feb7..b3de538ea7ce 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -1075,24 +1075,34 @@ xfs_trans_resv_calc(
/*
* Compute an alternate set of log reservation sizes for use exclusively with
- * minimum log size calculations.
+ * minimum log size calculations. Caller must ensure that no other
+ * transactions are running.
*/
void
xfs_trans_resv_calc_logsize(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
+ unsigned int rmap_maxlevels = mp->m_rmap_maxlevels;
+
ASSERT(resp != M_RES(mp));
/*
* The metadata directory tree feature drops the oversized log
- * reservations introduced by reflink.
+ * reservations introduced by reflink and rmap.
*/
if (xfs_has_metadir(mp)) {
xfs_trans_resv_calc(mp, resp);
return;
}
+ /*
+ * In the early days of rmap+reflink, we hardcoded the rmap maxlevels
+ * to 9 even if the AG size was smaller.
+ */
+ if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp))
+ mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS;
+
xfs_trans_resv_calc(mp, resp);
if (xfs_has_reflink(mp)) {
@@ -1128,4 +1138,7 @@ xfs_trans_resv_calc_logsize(
resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
}
+
+ /* Put everything back the way it was. This goes at the end. */
+ mp->m_rmap_maxlevels = rmap_maxlevels;
}
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 50332be34388..440c9c390b86 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -17,6 +17,13 @@
/* Adding one rmap could split every level up to the top of the tree. */
#define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels)
+/*
+ * Note that we historically set m_rmap_maxlevels to 9 when reflink was
+ * enabled, so we must preserve this behavior to avoid changing the transaction
+ * space reservations.
+ */
+#define XFS_OLD_REFLINK_RMAP_MAXLEVELS (9)
+
/* Blocks we might need to add "b" rmaps to a tree. */
#define XFS_NRMAPADD_SPACE_RES(mp, b)\
(((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c75219c8ee66..ac8e1b1efb14 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -664,7 +664,7 @@ xfs_mountfs(
xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
xfs_mount_setup_inode_geom(mp);
- xfs_rmapbt_compute_maxlevels(mp);
+ mp->m_rmap_maxlevels = xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp);
/*