summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2019-08-30 15:44:47 -0700
committerDarrick J. Wong <darrick.wong@oracle.com>2019-10-09 09:39:19 -0700
commit45960191ce657f46ecdbadae82d015171f1c2d7c (patch)
tree809eb678257f9f3df2a87e557c091feb02e2f5e7
parentea845040b6a4d6c54a845e8f366c3c68ab6f3fb1 (diff)
xfs: implement block reservation accounting for btrees we're staging
Create a new xrep_newbt structure to encapsulate a fake root for creating a staged btree cursor as well as to track all the blocks that we need to reserve in order to build that btree. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/scrub/repair.c260
-rw-r--r--fs/xfs/scrub/repair.h61
-rw-r--r--fs/xfs/scrub/trace.h58
3 files changed, 379 insertions, 0 deletions
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 588bc054db5c..beebd484c5f3 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -359,6 +359,266 @@ xrep_init_btblock(
return 0;
}
+/* Initialize accounting resources for staging a new AG btree. */
+void
+xrep_newbt_init_ag(
+ struct xrep_newbt *xnr,
+ struct xfs_scrub *sc,
+ const struct xfs_owner_info *oinfo,
+ xfs_fsblock_t alloc_hint,
+ enum xfs_ag_resv_type resv)
+{
+ memset(xnr, 0, sizeof(struct xrep_newbt));
+ xnr->sc = sc;
+ xnr->oinfo = *oinfo; /* structure copy */
+ xnr->alloc_hint = alloc_hint;
+ xnr->resv = resv;
+ INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/* Initialize accounting resources for staging a new inode fork btree. */
+void
+xrep_newbt_init_inode(
+ struct xrep_newbt *xnr,
+ struct xfs_scrub *sc,
+ int whichfork,
+ const struct xfs_owner_info *oinfo)
+{
+ memset(xnr, 0, sizeof(struct xrep_newbt));
+ xnr->sc = sc;
+ xnr->oinfo = *oinfo; /* structure copy */
+ xnr->alloc_hint = XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino);
+ xnr->resv = XFS_AG_RESV_NONE;
+ xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
+ xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
+ INIT_LIST_HEAD(&xnr->reservations);
+}
+
+/*
+ * Initialize accounting resources for staging a new btree. Callers are
+ * expected to add their own reservations (and clean them up) manually.
+ */
+void
+xrep_newbt_init_bare(
+ struct xrep_newbt *xnr,
+ struct xfs_scrub *sc)
+{
+ xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
+ XFS_AG_RESV_NONE);
+}
+
+/* Add a space reservation manually. */
+int
+xrep_newbt_add_reservation(
+ struct xrep_newbt *xnr,
+ xfs_fsblock_t fsbno,
+ xfs_extlen_t len)
+{
+ struct xrep_newbt_resv *resv;
+
+ resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
+ if (!resv)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&resv->list);
+ resv->fsbno = fsbno;
+ resv->len = len;
+ resv->used = 0;
+ list_add_tail(&resv->list, &xnr->reservations);
+ return 0;
+}
+
+/* Reserve disk space for our new btree. */
+int
+xrep_newbt_reserve_space(
+ struct xrep_newbt *xnr,
+ uint64_t nr_blocks)
+{
+ struct xfs_scrub *sc = xnr->sc;
+ xfs_alloctype_t type;
+ xfs_fsblock_t alloc_hint = xnr->alloc_hint;
+ int error = 0;
+
+ type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
+
+ while (nr_blocks > 0 && !error) {
+ struct xfs_alloc_arg args = {
+ .tp = sc->tp,
+ .mp = sc->mp,
+ .type = type,
+ .fsbno = alloc_hint,
+ .oinfo = xnr->oinfo,
+ .minlen = 1,
+ .maxlen = nr_blocks,
+ .prod = nr_blocks,
+ .resv = xnr->resv,
+ };
+
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ return error;
+ if (args.fsbno == NULLFSBLOCK)
+ return -ENOSPC;
+
+ trace_xrep_newbt_reserve_space(sc->mp,
+ XFS_FSB_TO_AGNO(sc->mp, args.fsbno),
+ XFS_FSB_TO_AGBNO(sc->mp, args.fsbno),
+ args.len, xnr->oinfo.oi_owner);
+
+ error = xrep_newbt_add_reservation(xnr, args.fsbno, args.len);
+ if (error)
+ break;
+
+ nr_blocks -= args.len;
+ alloc_hint = args.fsbno + args.len - 1;
+
+ if (sc->ip)
+ error = xfs_trans_roll_inode(&sc->tp, sc->ip);
+ else
+ error = xrep_roll_ag_trans(sc);
+ }
+
+ return error;
+}
+
+/* Free all the accounting info and disk space we reserved for a new btree. */
+void
+xrep_newbt_destroy(
+ struct xrep_newbt *xnr,
+ int error)
+{
+ struct xfs_scrub *sc = xnr->sc;
+ struct xrep_newbt_resv *resv, *n;
+
+ if (error)
+ goto junkit;
+
+ list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+ /* Free every block we didn't use. */
+ resv->fsbno += resv->used;
+ resv->len -= resv->used;
+ resv->used = 0;
+
+ if (resv->len > 0) {
+ trace_xrep_newbt_unreserve_space(sc->mp,
+ XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
+ XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
+ resv->len, xnr->oinfo.oi_owner);
+
+ __xfs_bmap_add_free(sc->tp, resv->fsbno, resv->len,
+ &xnr->oinfo, true);
+ }
+
+ list_del(&resv->list);
+ kmem_free(resv);
+ }
+
+junkit:
+ list_for_each_entry_safe(resv, n, &xnr->reservations, list) {
+ list_del(&resv->list);
+ kmem_free(resv);
+ }
+
+ if (sc->ip) {
+ kmem_zone_free(xfs_ifork_zone, xnr->ifake.if_fork);
+ xnr->ifake.if_fork = NULL;
+ }
+}
+
+/* Feed one of the reserved btree blocks to the bulk loader. */
+int
+xrep_newbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ struct xrep_newbt *xnr,
+ union xfs_btree_ptr *ptr)
+{
+ struct xrep_newbt_resv *resv;
+ xfs_fsblock_t fsb;
+
+ /*
+ * If last_resv doesn't have a block for us, move forward until we find
+ * one that does (or run out of reservations).
+ */
+ if (xnr->last_resv == NULL) {
+ list_for_each_entry(resv, &xnr->reservations, list) {
+ if (resv->used < resv->len) {
+ xnr->last_resv = resv;
+ break;
+ }
+ }
+ if (xnr->last_resv == NULL)
+ return -ENOSPC;
+ } else if (xnr->last_resv->used == xnr->last_resv->len) {
+ if (xnr->last_resv->list.next == &xnr->reservations)
+ return -ENOSPC;
+ xnr->last_resv = list_entry(xnr->last_resv->list.next,
+ struct xrep_newbt_resv, list);
+ }
+
+ /* Nab the block. */
+ fsb = xnr->last_resv->fsbno + xnr->last_resv->used;
+ xnr->last_resv->used++;
+
+ trace_xrep_newbt_alloc_block(cur->bc_mp,
+ XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
+ XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
+ xnr->oinfo.oi_owner);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ ptr->l = cpu_to_be64(fsb);
+ else
+ ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
+ return 0;
+}
+
+/*
+ * Estimate proper slack values for a btree that's being reloaded.
+ *
+ * Under most circumstances, we'll take whatever default loading value the
+ * btree bulk loading code calculates for us. However, there are some
+ * exceptions to this rule:
+ *
+ * (1) If someone turned one of the debug knobs.
+ * (2) If this is a per-AG btree and the AG has less than ~9% space free.
+ * (3) If this is an inode btree and the FS has less than ~9% space free.
+ *
+ * Note that we actually use 3/32 for the comparison to avoid division.
+ */
+void
+xrep_bload_estimate_slack(
+ struct xfs_scrub *sc,
+ struct xfs_btree_bload *bload)
+{
+ uint64_t free;
+ uint64_t sz;
+
+ /*
+ * The xfs_globals values are set to -1 (i.e. take the bload defaults)
+ * unless someone has set them otherwise, so we just pull the values
+ * here.
+ */
+ bload->leaf_slack = xfs_globals.bload_leaf_slack;
+ bload->node_slack = xfs_globals.bload_node_slack;
+
+ if (sc->ops->type == ST_PERAG) {
+ free = sc->sa.pag->pagf_freeblks;
+ sz = xfs_ag_block_count(sc->mp, sc->sa.agno);
+ } else {
+ free = percpu_counter_sum(&sc->mp->m_fdblocks);
+ sz = sc->mp->m_sb.sb_dblocks;
+ }
+
+ /* No further changes if there's more than 3/32ths space left. */
+ if (free >= ((sz * 3) >> 5))
+ return;
+
+ /* We're low on space; load the btrees as tightly as possible. */
+ if (bload->leaf_slack < 0)
+ bload->leaf_slack = 0;
+ if (bload->node_slack < 0)
+ bload->node_slack = 0;
+}
+
/*
* Reconstructing per-AG Btrees
*
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 479cfe38065e..ab6c1199ecc0 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -6,6 +6,10 @@
#ifndef __XFS_SCRUB_REPAIR_H__
#define __XFS_SCRUB_REPAIR_H__
+#include "scrub/bitmap.h"
+#include "xfs_btree.h"
+union xfs_btree_ptr;
+
static inline int xrep_notsupported(struct xfs_scrub *sc)
{
return -EOPNOTSUPP;
@@ -59,6 +63,63 @@ int xrep_agf(struct xfs_scrub *sc);
int xrep_agfl(struct xfs_scrub *sc);
int xrep_agi(struct xfs_scrub *sc);
+struct xrep_newbt_resv {
+ /* Link to list of extents that we've reserved. */
+ struct list_head list;
+
+ /* FSB of the block we reserved. */
+ xfs_fsblock_t fsbno;
+
+ /* Length of the reservation. */
+ xfs_extlen_t len;
+
+ /* How much of this reservation we've used. */
+ xfs_extlen_t used;
+};
+
+struct xrep_newbt {
+ struct xfs_scrub *sc;
+
+ /* List of extents that we've reserved. */
+ struct list_head reservations;
+
+ /* Fake root for new btree. */
+ union {
+ struct xbtree_afakeroot afake;
+ struct xbtree_ifakeroot ifake;
+ };
+
+ /* rmap owner of these blocks */
+ struct xfs_owner_info oinfo;
+
+ /* The last reservation we allocated from. */
+ struct xrep_newbt_resv *last_resv;
+
+ /* Allocation hint */
+ xfs_fsblock_t alloc_hint;
+
+ /* per-ag reservation type */
+ enum xfs_ag_resv_type resv;
+};
+
+#define for_each_xrep_newbt_reservation(xnr, resv, n) \
+ list_for_each_entry_safe((resv), (n), &(xnr)->reservations, list)
+
+void xrep_newbt_init_bare(struct xrep_newbt *xba, struct xfs_scrub *sc);
+void xrep_newbt_init_ag(struct xrep_newbt *xba, struct xfs_scrub *sc,
+ const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
+ enum xfs_ag_resv_type resv);
+void xrep_newbt_init_inode(struct xrep_newbt *xba, struct xfs_scrub *sc,
+ int whichfork, const struct xfs_owner_info *oinfo);
+int xrep_newbt_add_reservation(struct xrep_newbt *xba, xfs_fsblock_t fsbno,
+ xfs_extlen_t len);
+int xrep_newbt_reserve_space(struct xrep_newbt *xba, uint64_t nr_blocks);
+void xrep_newbt_destroy(struct xrep_newbt *xba, int error);
+int xrep_newbt_alloc_block(struct xfs_btree_cur *cur, struct xrep_newbt *xba,
+ union xfs_btree_ptr *ptr);
+void xrep_bload_estimate_slack(struct xfs_scrub *sc,
+ struct xfs_btree_bload *bload);
+
#else
static inline int xrep_attempt(
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 3362bae28b46..deb177abf652 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -904,6 +904,64 @@ TRACE_EVENT(xrep_ialloc_insert,
__entry->freemask)
)
+DECLARE_EVENT_CLASS(xrep_newbt_extent_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agblock_t agbno, xfs_extlen_t len,
+ int64_t owner),
+ TP_ARGS(mp, agno, agbno, len, owner),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, len)
+ __field(int64_t, owner)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agbno = agbno;
+ __entry->len = len;
+ __entry->owner = owner;
+ ),
+ TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->agbno,
+ __entry->len,
+ __entry->owner)
+);
+#define DEFINE_NEWBT_EXTENT_EVENT(name) \
+DEFINE_EVENT(xrep_newbt_extent_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+ xfs_agblock_t agbno, xfs_extlen_t len, \
+ int64_t owner), \
+ TP_ARGS(mp, agno, agbno, len, owner))
+DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_reserve_space);
+DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_unreserve_space);
+
+TRACE_EVENT(xrep_newbt_alloc_block,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agblock_t agbno, int64_t owner),
+ TP_ARGS(mp, agno, agbno, owner),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(int64_t, owner)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agbno = agbno;
+ __entry->owner = owner;
+ ),
+ TP_printk("dev %d:%d agno %u agbno %u owner %lld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->agbno,
+ __entry->owner)
+);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */