summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2022-07-14 11:15:27 -0700
committerDarrick J. Wong <djwong@kernel.org>2022-11-09 19:08:00 -0800
commit0559ac6715264a524ffb84b62021ecde383d26b6 (patch)
tree215890438839590fb4d6e1e47b45a4e702e6ab0e
parent4ae7e45a0598e55ecc5bd4a966c5ee0aec2c7ab1 (diff)
xfs: support logging EFIs for realtime extents
Teach the EFI mechanism how to free realtime extents. We do this very sneakily, by using the upper bit of the length field in the log format (and a boolean flag incore) to convey the realtime status. We're going to need this to enforce proper ordering of operations when we enable realtime rmap. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c35
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h17
-rw-r--r--fs/xfs/libxfs/xfs_defer.c1
-rw-r--r--fs/xfs/libxfs/xfs_defer.h1
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h7
-rw-r--r--fs/xfs/xfs_extfree_item.c68
6 files changed, 116 insertions, 13 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index e23b55f92e04..2cd8f5ce6755 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2607,6 +2607,7 @@ xfs_free_extent_later(
{
struct xfs_extent_free_item *xefi;
struct xfs_mount *mp = tp->t_mountp;
+ enum xfs_defer_ops_type optype;
#ifdef DEBUG
xfs_agnumber_t agno;
xfs_agblock_t agbno;
@@ -2615,12 +2616,19 @@ xfs_free_extent_later(
ASSERT(len > 0);
ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
ASSERT(!isnullstartblock(bno));
- agno = XFS_FSB_TO_AGNO(mp, bno);
- agbno = XFS_FSB_TO_AGBNO(mp, bno);
- ASSERT(agno < mp->m_sb.sb_agcount);
- ASSERT(agbno < mp->m_sb.sb_agblocks);
- ASSERT(len < mp->m_sb.sb_agblocks);
- ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
+ if (flags & XFS_FREE_EXTENT_REALTIME) {
+ ASSERT(bno < mp->m_sb.sb_rblocks);
+ ASSERT(len <= mp->m_sb.sb_rblocks);
+ ASSERT(bno + len <= mp->m_sb.sb_rblocks);
+ } else {
+ agno = XFS_FSB_TO_AGNO(mp, bno);
+ agbno = XFS_FSB_TO_AGBNO(mp, bno);
+
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(agbno < mp->m_sb.sb_agblocks);
+ ASSERT(len < mp->m_sb.sb_agblocks);
+ ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
+ }
#endif
ASSERT(!(flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
ASSERT(xfs_extfree_item_cache != NULL);
@@ -2631,6 +2639,19 @@ xfs_free_extent_later(
xefi->xefi_blockcount = (xfs_extlen_t)len;
if (flags & XFS_FREE_EXTENT_SKIP_DISCARD)
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
+ if (flags & XFS_FREE_EXTENT_REALTIME) {
+ /*
+ * Realtime and data section EFIs must use separate
+ * transactions to finish deferred work because updates to
+ * realtime metadata files can lock AGFs to allocate btree
+ * blocks and we don't want that mixing with the AGF locks
+ * taken to finish data section EFIs.
+ */
+ optype = XFS_DEFER_OPS_TYPE_FREE_RT;
+ xefi->xefi_flags |= XFS_EFI_REALTIME;
+ } else {
+ optype = XFS_DEFER_OPS_TYPE_FREE;
+ }
if (oinfo) {
ASSERT(oinfo->oi_offset == 0);
@@ -2646,7 +2667,7 @@ xfs_free_extent_later(
trace_xfs_extent_free_defer(mp, xefi);
xfs_extent_free_get_group(mp, xefi);
- xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
+ xfs_defer_add(tp, optype, &xefi->xefi_list);
}
#ifdef DEBUG
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 19c5f046c3c4..cd7b26568a33 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -228,7 +228,11 @@ void xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
/* Don't issue a discard for the blocks freed. */
#define XFS_FREE_EXTENT_SKIP_DISCARD (1U << 0)
-#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD)
+/* Free blocks on the realtime device. */
+#define XFS_FREE_EXTENT_REALTIME (1U << 1)
+
+#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD | \
+ XFS_FREE_EXTENT_REALTIME)
/*
* List of extents to be free "later".
@@ -239,7 +243,10 @@ struct xfs_extent_free_item {
uint64_t xefi_owner;
xfs_fsblock_t xefi_startblock;/* starting fs block number */
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
- struct xfs_perag *xefi_pag;
+ union {
+ struct xfs_perag *xefi_pag;
+ struct xfs_rtgroup *xefi_rtg;
+ };
unsigned int xefi_flags;
};
@@ -249,6 +256,12 @@ void xfs_extent_free_get_group(struct xfs_mount *mp,
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
+#define XFS_EFI_REALTIME (1U << 3) /* freeing realtime extent */
+
+static inline bool xfs_efi_is_realtime(const struct xfs_extent_free_item *xefi)
+{
+ return xefi->xefi_flags & XFS_EFI_REALTIME;
+}
extern struct kmem_cache *xfs_extfree_item_cache;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 1619b9b928db..c0416bae880a 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -188,6 +188,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
[XFS_DEFER_OPS_TYPE_REFCOUNT] = &xfs_refcount_update_defer_type,
[XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type,
[XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
+ [XFS_DEFER_OPS_TYPE_FREE_RT] = &xfs_extent_free_defer_type,
[XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
[XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type,
[XFS_DEFER_OPS_TYPE_SWAPEXT] = &xfs_swapext_defer_type,
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index bcc48b0c75c9..52198c7124c6 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -19,6 +19,7 @@ enum xfs_defer_ops_type {
XFS_DEFER_OPS_TYPE_RMAP,
XFS_DEFER_OPS_TYPE_FREE,
XFS_DEFER_OPS_TYPE_AGFL_FREE,
+ XFS_DEFER_OPS_TYPE_FREE_RT,
XFS_DEFER_OPS_TYPE_ATTR,
XFS_DEFER_OPS_TYPE_SWAPEXT,
XFS_DEFER_OPS_TYPE_MAX,
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 378201a70028..f3c8257a7545 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -592,6 +592,13 @@ typedef struct xfs_extent {
} xfs_extent_t;
/*
+ * This EFI extent describes a realtime extent. We can never free more than
+ * XFS_MAX_BMBT_EXTLEN (2^21) blocks at a time, so we know that the upper bits
+ * of ext_len cannot be used.
+ */
+#define XFS_EFI_EXTLEN_REALTIME_EXT (1U << 31)
+
+/*
* Since an xfs_extent_t has types (start:64, len: 32)
* there are different alignments on 32 bit and 64 bit kernels.
* So we provide the different variants for use by a
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index cf8f025f78b5..8d11c4e658e0 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -25,6 +25,10 @@
#include "xfs_error.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
struct kmem_cache *xfs_efi_cache;
struct kmem_cache *xfs_efd_cache;
@@ -363,9 +367,17 @@ xfs_trans_free_extent(
trace_xfs_extent_free_deferred(mp, xefi);
- error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
- xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
- xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
+ if (xfs_efi_is_realtime(xefi)) {
+ ASSERT(xefi->xefi_owner == XFS_RMAP_OWN_NULL ||
+ xefi->xefi_owner == XFS_RMAP_OWN_UNKNOWN);
+
+ error = xfs_rtfree_blocks(tp, xefi->xefi_startblock,
+ xefi->xefi_blockcount);
+ } else {
+ error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
+ xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
+ xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
+ }
/*
* Mark the transaction dirty, even on error. This ensures the
@@ -400,6 +412,11 @@ xfs_extent_free_diff_items(
ra = container_of(a, struct xfs_extent_free_item, xefi_list);
rb = container_of(b, struct xfs_extent_free_item, xefi_list);
+ ASSERT(xfs_efi_is_realtime(ra) == xfs_efi_is_realtime(rb));
+
+ if (xfs_efi_is_realtime(ra))
+ return ra->xefi_rtg->rtg_rgno - rb->xefi_rtg->rtg_rgno;
+
return ra->xefi_pag->pag_agno - rb->xefi_pag->pag_agno;
}
@@ -426,6 +443,8 @@ xfs_extent_free_log_item(
extp = &efip->efi_format.efi_extents[next_extent];
extp->ext_start = xefi->xefi_startblock;
extp->ext_len = xefi->xefi_blockcount;
+ if (xfs_efi_is_realtime(xefi))
+ extp->ext_len |= XFS_EFI_EXTLEN_REALTIME_EXT;
}
static struct xfs_log_item *
@@ -467,6 +486,14 @@ xfs_extent_free_get_group(
{
xfs_agnumber_t agno;
+ if (xfs_efi_is_realtime(xefi)) {
+ xfs_rgnumber_t rgno;
+
+ rgno = xfs_rtb_to_rgno(mp, xefi->xefi_startblock);
+ xefi->xefi_rtg = xfs_rtgroup_get(mp, rgno);
+ return;
+ }
+
agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock);
xefi->xefi_pag = xfs_perag_get(mp, agno);
xfs_perag_bump_intents(xefi->xefi_pag);
@@ -477,6 +504,11 @@ static inline void
xfs_extent_free_put_group(
struct xfs_extent_free_item *xefi)
{
+ if (xfs_efi_is_realtime(xefi)) {
+ xfs_rtgroup_put(xefi->xefi_rtg);
+ return;
+ }
+
xfs_perag_drop_intents(xefi->xefi_pag);
xfs_perag_put(xefi->xefi_pag);
}
@@ -494,6 +526,15 @@ xfs_extent_free_finish_item(
xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
+ /*
+ * Lock the rt bitmap if we've any realtime extents to free and we
+ * haven't locked the rt inodes yet.
+ */
+ if (*state == NULL && xfs_efi_is_realtime(xefi)) {
+ xfs_rtbitmap_lock(tp, tp->t_mountp);
+ *state = (struct xfs_btree_cur *)1;
+ }
+
error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi);
xfs_extent_free_put_group(xefi);
@@ -554,6 +595,7 @@ xfs_agfl_free_finish_item(
xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
ASSERT(xefi->xefi_blockcount == 1);
+ ASSERT(!xfs_efi_is_realtime(xefi));
agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock);
oinfo.oi_owner = xefi->xefi_owner;
@@ -602,6 +644,10 @@ xfs_efi_validate_ext(
struct xfs_mount *mp,
struct xfs_extent *extp)
{
+ if (extp->ext_len & XFS_EFI_EXTLEN_REALTIME_EXT)
+ return xfs_verify_rtbext(mp, extp->ext_start,
+ extp->ext_len & ~XFS_EFI_EXTLEN_REALTIME_EXT);
+
return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len);
}
@@ -642,15 +688,29 @@ xfs_efi_item_recover(
return error;
efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
+ /* Lock the rt bitmap if we've any realtime extents to free. */
+ for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+ extp = &efip->efi_format.efi_extents[i];
+ if (extp->ext_len & XFS_EFI_EXTLEN_REALTIME_EXT) {
+ xfs_rtbitmap_lock(tp, mp);
+ break;
+ }
+ }
+
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
struct xfs_extent_free_item fake = {
.xefi_owner = XFS_RMAP_OWN_UNKNOWN,
};
+ unsigned int len = extp->ext_len;
extp = &efip->efi_format.efi_extents[i];
fake.xefi_startblock = extp->ext_start;
- fake.xefi_blockcount = extp->ext_len;
+ if (len & XFS_EFI_EXTLEN_REALTIME_EXT) {
+ len &= ~XFS_EFI_EXTLEN_REALTIME_EXT;
+ fake.xefi_flags |= XFS_EFI_REALTIME;
+ }
+ fake.xefi_blockcount = len;
xfs_extent_free_get_group(mp, &fake);
error = xfs_trans_free_extent(tp, efdp, &fake);