summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 11:18:02 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:41:05 -0700
commit152c188d84f14c7e1681c30a8f96f231aa448dc1 (patch)
tree647f268e3e55c4d4d78f6d9b077d3fae865357ef
parent67a47d42707371e7eae75a257ea5f252f3ae9689 (diff)
xfs: create a noalloc mode for allocation groups
Create a new noalloc state for the per-AG structure that will disable block allocation in this AG. We accomplish this by subtracting from fdblocks all the free blocks in this AG, hiding those blocks from the allocator, and preventing freed blocks from updating fdblocks until we're ready to lift noalloc mode. Note that we reduce the free block count of the filesystem so that we can prevent transactions from entering the allocator looking for "free" space that we've turned off incore. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/libxfs/xfs_ag.c60
-rw-r--r--fs/xfs/libxfs/xfs_ag.h5
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c26
-rw-r--r--fs/xfs/scrub/fscounters.c3
-rw-r--r--fs/xfs/xfs_fsops.c10
-rw-r--r--fs/xfs/xfs_super.c1
-rw-r--r--fs/xfs/xfs_trace.h46
7 files changed, 146 insertions, 5 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index d0035e156a25..65b35d870fac 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -985,3 +985,63 @@ out_agi:
xfs_buf_relse(agi_bp);
return error;
}
+
+/* How many blocks does this AG contribute to fdblocks? */
+xfs_extlen_t
+xfs_ag_fdblocks(
+ struct xfs_perag *pag)
+{
+ xfs_extlen_t ret;
+
+ ASSERT(pag->pagf_init != 0);
+
+ ret = pag->pagf_freeblks + pag->pagf_flcount + pag->pagf_btreeblks;
+ ret -= pag->pag_meta_resv.ar_reserved;
+ ret -= pag->pag_rmapbt_resv.ar_orig_reserved;
+ return ret;
+}
+
+/*
+ * Hide all the free space in this AG. Caller must hold both the AGI and the
+ * AGF buffers or have otherwise prevented concurrent access.
+ */
+int
+xfs_ag_set_noalloc(
+ struct xfs_perag *pag)
+{
+ struct xfs_mount *mp = pag->pag_mount;
+ int error;
+
+ ASSERT(pag->pagf_init);
+ ASSERT(pag->pagi_init);
+
+ if (pag->pagf_noalloc)
+ return 0;
+
+ error = xfs_mod_fdblocks(mp, -(int64_t)xfs_ag_fdblocks(pag), false);
+ if (error)
+ return error;
+
+ trace_xfs_ag_set_noalloc(pag);
+ pag->pagf_noalloc = 1;
+ return 0;
+}
+
+/*
+ * Unhide all the free space in this AG. Caller must hold both the AGI and
+ * the AGF buffers or have otherwise prevented concurrent access.
+ */
+void
+xfs_ag_clear_noalloc(
+ struct xfs_perag *pag)
+{
+ struct xfs_mount *mp = pag->pag_mount;
+
+ if (!pag->pagf_noalloc)
+ return;
+
+ xfs_mod_fdblocks(mp, xfs_ag_fdblocks(pag), false);
+
+ trace_xfs_ag_clear_noalloc(pag);
+ pag->pagf_noalloc = 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 6088a3389e4d..c9e198e62b74 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -38,6 +38,7 @@ struct xfs_perag {
uint8_t pagf_metadata:1; /* the agf is preferred to be metadata */
uint8_t pagi_inodeok:1; /* The agi is ok for inodes */
uint8_t pagf_agflreset:1; /* agfl requires reset before use */
+ uint8_t pagf_noalloc:1; /* Do not allocate space from AG */
uint8_t pagf_levels[XFS_BTNUM_AGF];
/* # of levels in bno & cnt btree */
uint32_t pagf_flcount; /* count of blocks in freelist */
@@ -114,6 +115,10 @@ struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
unsigned int tag);
void xfs_perag_put(struct xfs_perag *pag);
+xfs_extlen_t xfs_ag_fdblocks(struct xfs_perag *pag);
+int xfs_ag_set_noalloc(struct xfs_perag *pag);
+void xfs_ag_clear_noalloc(struct xfs_perag *pag);
+
/*
* Perag iteration APIs
*
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index d51b62494c2f..9c7561aceb95 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -74,6 +74,13 @@ xfs_ag_resv_critical(
xfs_extlen_t orig;
xfs_extlen_t btree_maxlevels;
+ /*
+ * Pretend we're critically low on reservations in this AG to scare
+ * everyone else away.
+ */
+ if (pag->pagf_noalloc)
+ return true;
+
switch (type) {
case XFS_AG_RESV_METADATA:
avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
@@ -116,7 +123,12 @@ xfs_ag_resv_needed(
break;
case XFS_AG_RESV_IMETA:
case XFS_AG_RESV_NONE:
- /* empty */
+ /*
+ * In noalloc mode, we pretend that all the free blocks in this
+ * AG have been allocated. Make this AG look full.
+ */
+ if (pag->pagf_noalloc)
+ len += xfs_ag_fdblocks(pag);
break;
default:
ASSERT(0);
@@ -345,6 +357,8 @@ xfs_ag_resv_alloc_extent(
xfs_extlen_t len;
uint field;
+ ASSERT(type != XFS_AG_RESV_NONE || !pag->pagf_noalloc);
+
trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
switch (type) {
@@ -402,7 +416,13 @@ xfs_ag_resv_free_extent(
ASSERT(0);
fallthrough;
case XFS_AG_RESV_NONE:
- xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
+ /*
+ * If the AG is in noalloc mode, we don't give back to fdblocks
+ * until we lift noalloc mode.
+ */
+ if (!pag->pagf_noalloc)
+ xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS,
+ (int64_t)len);
fallthrough;
case XFS_AG_RESV_IGNORE:
return;
@@ -415,6 +435,6 @@ xfs_ag_resv_free_extent(
/* Freeing into the reserved pool only requires on-disk update... */
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
/* ...but freeing beyond that requires in-core and on-disk update. */
- if (len > leftover)
+ if (len > leftover && !pag->pagf_noalloc)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
}
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 22bb734fb81a..7fa2d1fdcc1b 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -240,7 +240,8 @@ retry:
*/
fsc->fdblocks -= pag->pag_meta_resv.ar_reserved;
fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved;
-
+ if (pag->pagf_noalloc)
+ fsc->fdblocks -= xfs_ag_fdblocks(pag);
}
if (pag)
xfs_perag_put(pag);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 4bbea04e77bc..c26deaf720a7 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -616,6 +616,14 @@ xfs_fs_unreserve_ag_blocks(
if (xfs_has_realtime(mp))
xfs_rt_resv_free(mp);
- for_each_perag(mp, agno, pag)
+ for_each_perag(mp, agno, pag) {
+ /*
+ * Bring the AG back online because our AG hiding only exists
+ * in-core and we need the superblock to be written out with
+ * the super fdblocks reflecting the AGF freeblks. Do this
+ * before adding the per-AG reservations back to fdblocks.
+ */
+ xfs_ag_clear_noalloc(pag);
xfs_ag_resv_free(pag);
+ }
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index a1fbd56ea7fc..8dc613043c5f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -331,6 +331,7 @@ xfs_set_inode_alloc(
pag->pagi_inodeok = 1;
pag->pagf_metadata = 0;
}
+ pag->pagf_noalloc = 0;
xfs_perag_put(pag);
}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e74ad18d95da..4bfe4de28f85 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -4112,6 +4112,52 @@ DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick);
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy);
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_unfixed_corruption);
+DECLARE_EVENT_CLASS(xfs_ag_noalloc_class,
+ TP_PROTO(struct xfs_perag *pag),
+ TP_ARGS(pag),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_extlen_t, freeblks)
+ __field(xfs_extlen_t, flcount)
+ __field(xfs_extlen_t, btreeblks)
+ __field(xfs_extlen_t, meta_resv)
+ __field(xfs_extlen_t, rmap_resv)
+
+ __field(unsigned long long, resblks)
+ __field(unsigned long long, resblks_avail)
+ ),
+ TP_fast_assign(
+ __entry->dev = pag->pag_mount->m_super->s_dev;
+ __entry->agno = pag->pag_agno;
+ __entry->freeblks = pag->pagf_freeblks;
+ __entry->flcount = pag->pagf_flcount;
+ __entry->btreeblks = pag->pagf_btreeblks;
+ __entry->meta_resv = pag->pag_meta_resv.ar_reserved;
+ __entry->rmap_resv = pag->pag_rmapbt_resv.ar_orig_reserved;
+
+ __entry->resblks = pag->pag_mount->m_resblks;
+ __entry->resblks_avail = pag->pag_mount->m_resblks_avail;
+ ),
+ TP_printk("dev %d:%d agno 0x%x freeblks %u flcount %u btreeblks %u metaresv %u rmapresv %u resblks %llu resblks_avail %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->freeblks,
+ __entry->flcount,
+ __entry->btreeblks,
+ __entry->meta_resv,
+ __entry->rmap_resv,
+ __entry->resblks,
+ __entry->resblks_avail)
+);
+#define DEFINE_AG_NOALLOC_EVENT(name) \
+DEFINE_EVENT(xfs_ag_noalloc_class, name, \
+ TP_PROTO(struct xfs_perag *pag), \
+ TP_ARGS(pag))
+
+DEFINE_AG_NOALLOC_EVENT(xfs_ag_set_noalloc);
+DEFINE_AG_NOALLOC_EVENT(xfs_ag_clear_noalloc);
+
TRACE_EVENT(xfs_iwalk_ag,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agino_t startino),