summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 11:18:02 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-12-15 17:29:18 -0800
commit22291cfb1132e89552db38399ab78dd3bfd0c940 (patch)
treed89c4cc246e233d2b69acc75a5b66c1a43fcd2f3
parentc6b4c6212e2a527cbb2cb0097da62a8a0051fc55 (diff)
xfs: create a noalloc mode for allocation groups
Create a new noalloc state for the per-AG structure that will disable block allocation in this AG. We accomplish this by subtracting from fdblocks all the free blocks in this AG, hiding those blocks from the allocator, and preventing freed blocks from updating fdblocks until we're ready to lift noalloc mode. Note that we reduce the free block count of the filesystem so that we can prevent transactions from entering the allocator looking for "free" space that we've turned off incore. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/libxfs/xfs_ag.c60
-rw-r--r--fs/xfs/libxfs/xfs_ag.h5
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c26
-rw-r--r--fs/xfs/scrub/fscounters.c3
-rw-r--r--fs/xfs/xfs_fsops.c10
-rw-r--r--fs/xfs/xfs_super.c1
-rw-r--r--fs/xfs/xfs_trace.h46
7 files changed, 146 insertions, 5 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 538d84371a6b..3101ffc08316 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -987,3 +987,63 @@ out_agi:
xfs_buf_relse(agi_bp);
return error;
}
+
+/* How many blocks does this AG contribute to fdblocks? */
+xfs_extlen_t
+xfs_ag_fdblocks(
+ struct xfs_perag *pag)
+{
+ xfs_extlen_t ret;
+
+ ASSERT(pag->pagf_init != 0);
+
+ ret = pag->pagf_freeblks + pag->pagf_flcount + pag->pagf_btreeblks;
+ ret -= pag->pag_meta_resv.ar_reserved;
+ ret -= pag->pag_rmapbt_resv.ar_orig_reserved;
+ return ret;
+}
+
+/*
+ * Hide all the free space in this AG. Caller must hold both the AGI and the
+ * AGF buffers or have otherwise prevented concurrent access.
+ */
+int
+xfs_ag_set_noalloc(
+ struct xfs_perag *pag)
+{
+ struct xfs_mount *mp = pag->pag_mount;
+ int error;
+
+ ASSERT(pag->pagf_init);
+ ASSERT(pag->pagi_init);
+
+ if (pag->pagf_noalloc)
+ return 0;
+
+ error = xfs_mod_fdblocks(mp, -(int64_t)xfs_ag_fdblocks(pag), false);
+ if (error)
+ return error;
+
+ trace_xfs_ag_set_noalloc(pag);
+ pag->pagf_noalloc = 1;
+ return 0;
+}
+
+/*
+ * Unhide all the free space in this AG. Caller must hold both the AGI and
+ * the AGF buffers or have otherwise prevented concurrent access.
+ */
+void
+xfs_ag_clear_noalloc(
+ struct xfs_perag *pag)
+{
+ struct xfs_mount *mp = pag->pag_mount;
+
+ if (!pag->pagf_noalloc)
+ return;
+
+ xfs_mod_fdblocks(mp, xfs_ag_fdblocks(pag), false);
+
+ trace_xfs_ag_clear_noalloc(pag);
+ pag->pagf_noalloc = 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 94761fcc0aa4..aae98fda12f6 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -38,6 +38,7 @@ struct xfs_perag {
uint8_t pagf_metadata:1; /* the agf is preferred to be metadata */
uint8_t pagi_inodeok:1; /* The agi is ok for inodes */
uint8_t pagf_agflreset:1; /* agfl requires reset before use */
+ uint8_t pagf_noalloc:1; /* Do not allocate space from AG */
uint8_t pagf_levels[XFS_BTNUM_AGF];
/* # of levels in bno & cnt btree */
uint32_t pagf_flcount; /* count of blocks in freelist */
@@ -116,6 +117,10 @@ struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
unsigned int tag);
void xfs_perag_put(struct xfs_perag *pag);
+xfs_extlen_t xfs_ag_fdblocks(struct xfs_perag *pag);
+int xfs_ag_set_noalloc(struct xfs_perag *pag);
+void xfs_ag_clear_noalloc(struct xfs_perag *pag);
+
/*
* Perag iteration APIs
*/
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 662d48a560b8..75662263d0e0 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -73,6 +73,13 @@ xfs_ag_resv_critical(
xfs_extlen_t avail;
xfs_extlen_t orig;
+ /*
+ * Pretend we're critically low on reservations in this AG to scare
+ * everyone else away.
+ */
+ if (pag->pagf_noalloc)
+ return true;
+
switch (type) {
case XFS_AG_RESV_METADATA:
avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
@@ -115,7 +122,12 @@ xfs_ag_resv_needed(
break;
case XFS_AG_RESV_IMETA:
case XFS_AG_RESV_NONE:
- /* empty */
+ /*
+ * In noalloc mode, we pretend that all the free blocks in this
+ * AG have been allocated. Make this AG look full.
+ */
+ if (pag->pagf_noalloc)
+ len += xfs_ag_fdblocks(pag);
break;
default:
ASSERT(0);
@@ -344,6 +356,8 @@ xfs_ag_resv_alloc_extent(
xfs_extlen_t len;
uint field;
+ ASSERT(type != XFS_AG_RESV_NONE || !pag->pagf_noalloc);
+
trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
switch (type) {
@@ -401,7 +415,13 @@ xfs_ag_resv_free_extent(
ASSERT(0);
fallthrough;
case XFS_AG_RESV_NONE:
- xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
+ /*
+ * If the AG is in noalloc mode, we don't give back to fdblocks
+ * until we lift noalloc mode.
+ */
+ if (!pag->pagf_noalloc)
+ xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS,
+ (int64_t)len);
fallthrough;
case XFS_AG_RESV_IGNORE:
return;
@@ -414,6 +434,6 @@ xfs_ag_resv_free_extent(
/* Freeing into the reserved pool only requires on-disk update... */
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
/* ...but freeing beyond that requires in-core and on-disk update. */
- if (len > leftover)
+ if (len > leftover && !pag->pagf_noalloc)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
}
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index f7e7ae58b4a4..fc0ae3d60657 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -239,7 +239,8 @@ retry:
*/
fsc->fdblocks -= pag->pag_meta_resv.ar_reserved;
fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved;
-
+ if (pag->pagf_noalloc)
+ fsc->fdblocks -= xfs_ag_fdblocks(pag);
}
if (pag)
xfs_perag_put(pag);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 85e10f23e376..8f984d3e3ce5 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -625,6 +625,14 @@ xfs_fs_unreserve_ag_blocks(
if (xfs_has_realtime(mp))
xfs_rt_resv_free(mp);
- for_each_perag(mp, agno, pag)
+ for_each_perag(mp, agno, pag) {
+ /*
+ * Bring the AG back online because our AG hiding only exists
+ * in-core and we need the superblock to be written out with
+ * the super fdblocks reflecting the AGF freeblks. Do this
+ * before adding the per-AG reservations back to fdblocks.
+ */
+ xfs_ag_clear_noalloc(pag);
xfs_ag_resv_free(pag);
+ }
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 2c0156653058..514f89e01992 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -325,6 +325,7 @@ xfs_set_inode_alloc(
pag->pagi_inodeok = 1;
pag->pagf_metadata = 0;
}
+ pag->pagf_noalloc = 0;
xfs_perag_put(pag);
}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ab1a0e5945da..786c89154bdd 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -4131,6 +4131,52 @@ DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick);
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy);
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_unfixed_corruption);
+DECLARE_EVENT_CLASS(xfs_ag_noalloc_class,
+ TP_PROTO(struct xfs_perag *pag),
+ TP_ARGS(pag),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_extlen_t, freeblks)
+ __field(xfs_extlen_t, flcount)
+ __field(xfs_extlen_t, btreeblks)
+ __field(xfs_extlen_t, meta_resv)
+ __field(xfs_extlen_t, rmap_resv)
+
+ __field(unsigned long long, resblks)
+ __field(unsigned long long, resblks_avail)
+ ),
+ TP_fast_assign(
+ __entry->dev = pag->pag_mount->m_super->s_dev;
+ __entry->agno = pag->pag_agno;
+ __entry->freeblks = pag->pagf_freeblks;
+ __entry->flcount = pag->pagf_flcount;
+ __entry->btreeblks = pag->pagf_btreeblks;
+ __entry->meta_resv = pag->pag_meta_resv.ar_reserved;
+ __entry->rmap_resv = pag->pag_rmapbt_resv.ar_orig_reserved;
+
+ __entry->resblks = pag->pag_mount->m_resblks;
+ __entry->resblks_avail = pag->pag_mount->m_resblks_avail;
+ ),
+ TP_printk("dev %d:%d agno 0x%x freeblks %u flcount %u btreeblks %u metaresv %u rmapresv %u resblks %llu resblks_avail %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->freeblks,
+ __entry->flcount,
+ __entry->btreeblks,
+ __entry->meta_resv,
+ __entry->rmap_resv,
+ __entry->resblks,
+ __entry->resblks_avail)
+);
+#define DEFINE_AG_NOALLOC_EVENT(name) \
+DEFINE_EVENT(xfs_ag_noalloc_class, name, \
+ TP_PROTO(struct xfs_perag *pag), \
+ TP_ARGS(pag))
+
+DEFINE_AG_NOALLOC_EVENT(xfs_ag_set_noalloc);
+DEFINE_AG_NOALLOC_EVENT(xfs_ag_clear_noalloc);
+
TRACE_EVENT(xfs_iwalk_ag,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agino_t startino),