summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/reap.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub/reap.c')
-rw-r--r--fs/xfs/scrub/reap.c388
1 files changed, 312 insertions, 76 deletions
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
index df13a9e0fe86..1a0d24295e5f 100644
--- a/fs/xfs/scrub/reap.c
+++ b/fs/xfs/scrub/reap.c
@@ -27,6 +27,10 @@
#include "xfs_quota.h"
#include "xfs_qm.h"
#include "xfs_bmap.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_attr_remote.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -76,20 +80,29 @@
*/
/* Information about reaping extents after a repair. */
-struct xrep_reap_state {
+struct xreap_state {
struct xfs_scrub *sc;
/* Reverse mapping owner and metadata reservation type. */
const struct xfs_owner_info *oinfo;
enum xfs_ag_resv_type resv;
+ /* If true, roll the transaction before reaping the next extent. */
+ bool force_roll;
+
/* Number of deferred reaps attached to the current transaction. */
unsigned int deferred;
+
+ /* Number of invalidated buffers logged to the current transaction. */
+ unsigned int invalidated;
+
+ /* Number of deferred reaps queued during the whole reap sequence. */
+ unsigned long long total_deferred;
};
/* Put a block back on the AGFL. */
STATIC int
-xrep_put_freelist(
+xreap_put_freelist(
struct xfs_scrub *sc,
xfs_agblock_t agbno)
{
@@ -126,69 +139,226 @@ xrep_put_freelist(
return 0;
}
-/* Try to invalidate the incore buffer for a block that we're about to free. */
+/* Are there any uncommitted reap operations? */
+static inline bool xreap_dirty(const struct xreap_state *rs)
+{
+ if (rs->force_roll)
+ return true;
+ if (rs->deferred)
+ return true;
+ if (rs->invalidated)
+ return true;
+ if (rs->total_deferred)
+ return true;
+ return false;
+}
+
+#define XREAP_MAX_BINVAL (2048)
+
+/*
+ * Decide if we want to roll the transaction after reaping an extent. We don't
+ * want to overrun the transaction reservation, so we prohibit more than
+ * 128 EFIs per transaction. For the same reason, we limit the number
+ * of buffer invalidations to 2048.
+ */
+static inline bool xreap_want_roll(const struct xreap_state *rs)
+{
+ if (rs->force_roll)
+ return true;
+ if (rs->deferred > XREP_MAX_ITRUNCATE_EFIS)
+ return true;
+ if (rs->invalidated > XREAP_MAX_BINVAL)
+ return true;
+ return false;
+}
+
+static inline void xreap_reset(struct xreap_state *rs)
+{
+ rs->total_deferred += rs->deferred;
+ rs->deferred = 0;
+ rs->invalidated = 0;
+ rs->force_roll = false;
+}
+
+#define XREAP_MAX_DEFER_CHAIN (2048)
+
+/*
+ * Decide if we want to finish the deferred ops that are attached to the scrub
+ * transaction. We don't want to queue huge chains of deferred ops because
+ * that can consume a lot of log space and kernel memory. Hence we trigger a
+ * xfs_defer_finish if there are more than 2048 deferred reap operations or the
+ * caller did some real work.
+ */
+static inline bool
+xreap_want_defer_finish(const struct xreap_state *rs)
+{
+ if (rs->force_roll)
+ return true;
+ if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
+ return true;
+ return false;
+}
+
+static inline void xreap_defer_finish_reset(struct xreap_state *rs)
+{
+ rs->total_deferred = 0;
+ rs->deferred = 0;
+ rs->invalidated = 0;
+ rs->force_roll = false;
+}
+
+/* Try to invalidate the incore buffers for an extent that we're freeing. */
STATIC void
-xrep_block_reap_binval(
- struct xfs_scrub *sc,
- xfs_fsblock_t fsbno)
+xreap_agextent_binval(
+ struct xreap_state *rs,
+ xfs_agblock_t agbno,
+ xfs_extlen_t *aglenp)
{
- struct xfs_buf *bp = NULL;
- int error;
+ struct xfs_scrub *sc = rs->sc;
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agnumber_t agno = sc->sa.pag->pag_agno;
+ xfs_agblock_t agbno_next = agbno + *aglenp;
+ xfs_agblock_t bno = agbno;
/*
- * If there's an incore buffer for exactly this block, invalidate it.
* Avoid invalidating AG headers and post-EOFS blocks because we never
* own those.
*/
- if (!xfs_verify_fsbno(sc->mp, fsbno))
+ if (!xfs_verify_agbno(pag, agbno) ||
+ !xfs_verify_agbno(pag, agbno_next - 1))
return;
/*
- * We assume that the lack of any other known owners means that the
- * buffer can be locked without risk of deadlocking.
+ * If there are incore buffers for these blocks, invalidate them. We
+ * assume that the lack of any other known owners means that the buffer
+ * can be locked without risk of deadlocking. The buffer cache cannot
+ * detect aliasing, so employ nested loops to scan for incore buffers
+ * of any plausible size.
*/
- error = xfs_buf_incore(sc->mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(sc->mp, fsbno),
- XFS_FSB_TO_BB(sc->mp, 1), XBF_LIVESCAN, &bp);
- if (error)
- return;
+ while (bno < agbno_next) {
+ xfs_agblock_t fsbcount;
+ xfs_agblock_t max_fsbs;
+
+ /*
+ * Max buffer size is the max remote xattr buffer size, which
+ * is one fs block larger than 64k.
+ */
+ max_fsbs = min_t(xfs_agblock_t, agbno_next - bno,
+ xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX));
+
+ for (fsbcount = 1; fsbcount < max_fsbs; fsbcount++) {
+ struct xfs_buf *bp = NULL;
+ xfs_daddr_t daddr;
+ int error;
+
+ daddr = XFS_AGB_TO_DADDR(mp, agno, bno);
+ error = xfs_buf_incore(mp->m_ddev_targp, daddr,
+ XFS_FSB_TO_BB(mp, fsbcount),
+ XBF_LIVESCAN, &bp);
+ if (error)
+ continue;
+
+ xfs_trans_bjoin(sc->tp, bp);
+ xfs_trans_binval(sc->tp, bp);
+ rs->invalidated++;
+
+ /*
+ * Stop invalidating if we've hit the limit; we should
+ * still have enough reservation left to free however
+ * far we've gotten.
+ */
+ if (rs->invalidated > XREAP_MAX_BINVAL) {
+ *aglenp -= agbno_next - bno;
+ goto out;
+ }
+ }
+
+ bno++;
+ }
- xfs_trans_bjoin(sc->tp, bp);
- xfs_trans_binval(sc->tp, bp);
+out:
+ trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
}
-/* Dispose of a single block. */
+/*
+ * Figure out the longest run of blocks that we can dispose of with a single
+ * call. Cross-linked blocks should have their reverse mappings removed, but
+ * single-owner extents can be freed. AGFL blocks can only be put back one at
+ * a time.
+ */
STATIC int
-xrep_reap_block(
- uint64_t fsbno,
- void *priv)
+xreap_agextent_select(
+ struct xreap_state *rs,
+ xfs_agblock_t agbno,
+ xfs_agblock_t agbno_next,
+ bool *crosslinked,
+ xfs_extlen_t *aglenp)
{
- struct xrep_reap_state *rs = priv;
- struct xfs_scrub *sc = rs->sc;
- struct xfs_btree_cur *cur;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- bool has_other_rmap;
- bool need_roll = true;
- int error;
+ struct xfs_scrub *sc = rs->sc;
+ struct xfs_btree_cur *cur;
+ xfs_agblock_t bno = agbno + 1;
+ xfs_extlen_t len = 1;
+ int error;
- agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
- agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
+ /*
+ * Determine if there are any other rmap records covering the first
+ * block of this extent. If so, the block is crosslinked.
+ */
+ cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.pag);
+ error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
+ crosslinked);
+ if (error)
+ goto out_cur;
- /* We don't support reaping file extents yet. */
- if (sc->ip != NULL || sc->sa.pag->pag_agno != agno) {
- ASSERT(0);
- return -EFSCORRUPTED;
- }
+ /* AGFL blocks can only be deal with one at a time. */
+ if (rs->resv == XFS_AG_RESV_AGFL)
+ goto out_found;
- cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp, sc->sa.pag);
+ /*
+ * Figure out how many of the subsequent blocks have the same crosslink
+ * status.
+ */
+ while (bno < agbno_next) {
+ bool also_crosslinked;
- /* Can we find any other rmappings? */
- error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
- &has_other_rmap);
+ error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
+ &also_crosslinked);
+ if (error)
+ goto out_cur;
+
+ if (*crosslinked != also_crosslinked)
+ break;
+
+ len++;
+ bno++;
+ }
+
+out_found:
+ *aglenp = len;
+ trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
+out_cur:
xfs_btree_del_cursor(cur, error);
- if (error)
- return error;
+ return error;
+}
+
+/*
+ * Dispose of as much of the beginning of this AG extent as possible. The
+ * number of blocks disposed of will be returned in @aglenp.
+ */
+STATIC int
+xreap_agextent_iter(
+ struct xreap_state *rs,
+ xfs_agblock_t agbno,
+ xfs_extlen_t *aglenp,
+ bool crosslinked)
+{
+ struct xfs_scrub *sc = rs->sc;
+ xfs_fsblock_t fsbno;
+ int error = 0;
+
+ fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
/*
* If there are other rmappings, this block is cross linked and must
@@ -203,55 +373,117 @@ xrep_reap_block(
* blow on writeout, the filesystem will shut down, and the admin gets
* to run xfs_repair.
*/
- if (has_other_rmap) {
- trace_xrep_dispose_unmap_extent(sc->sa.pag, agbno, 1);
+ if (crosslinked) {
+ trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
+
+ rs->force_roll = true;
+ return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
+ *aglenp, rs->oinfo);
+ }
+
+ trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
- error = xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
- 1, rs->oinfo);
+ /*
+ * Invalidate as many buffers as we can, starting at agbno. If this
+ * function sets *aglenp to zero, the transaction is full of logged
+ * buffer invalidations, so we need to return early so that we can
+ * roll and retry.
+ */
+ xreap_agextent_binval(rs, agbno, aglenp);
+ if (*aglenp == 0) {
+ ASSERT(xreap_want_roll(rs));
+ return 0;
+ }
+
+ /* Put blocks back on the AGFL one at a time. */
+ if (rs->resv == XFS_AG_RESV_AGFL) {
+ ASSERT(*aglenp == 1);
+ error = xreap_put_freelist(sc, agbno);
if (error)
return error;
- goto roll_out;
+ rs->force_roll = true;
+ return 0;
+ }
+
+ /*
+ * Use deferred frees to get rid of the old btree blocks to try to
+ * minimize the window in which we could crash and lose the old blocks.
+ */
+ error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
+ rs->resv, true);
+ if (error)
+ return error;
+
+ rs->deferred++;
+ return 0;
+}
+
+/*
+ * Break an AG metadata extent into sub-extents by fate (crosslinked, not
+ * crosslinked), and dispose of each sub-extent separately.
+ */
+STATIC int
+xreap_agmeta_extent(
+ uint64_t fsbno,
+ uint64_t len,
+ void *priv)
+{
+ struct xreap_state *rs = priv;
+ struct xfs_scrub *sc = rs->sc;
+ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
+ xfs_agblock_t agbno_next = agbno + len;
+ int error = 0;
+
+ ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
+ ASSERT(sc->ip == NULL);
+
+ if (agno != sc->sa.pag->pag_agno) {
+ ASSERT(sc->sa.pag->pag_agno == agno);
+ return -EFSCORRUPTED;
}
- trace_xrep_dispose_free_extent(sc->sa.pag, agbno, 1);
+ while (agbno < agbno_next) {
+ xfs_extlen_t aglen;
+ bool crosslinked;
- xrep_block_reap_binval(sc, fsbno);
+ error = xreap_agextent_select(rs, agbno, agbno_next,
+ &crosslinked, &aglen);
+ if (error)
+ return error;
- if (rs->resv == XFS_AG_RESV_AGFL) {
- error = xrep_put_freelist(sc, agbno);
- } else {
- /*
- * Use deferred frees to get rid of the old btree blocks to try
- * to minimize the window in which we could crash and lose the
- * old blocks. However, we still need to roll the transaction
- * every 100 or so EFIs so that we don't exceed the log
- * reservation.
- */
- error = __xfs_free_extent_later(sc->tp, fsbno, 1, rs->oinfo,
- rs->resv, true);
+ error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
if (error)
return error;
- rs->deferred++;
- need_roll = rs->deferred > 100;
+
+ if (xreap_want_defer_finish(rs)) {
+ error = xrep_defer_finish(sc);
+ if (error)
+ return error;
+ xreap_defer_finish_reset(rs);
+ } else if (xreap_want_roll(rs)) {
+ error = xrep_roll_ag_trans(sc);
+ if (error)
+ return error;
+ xreap_reset(rs);
+ }
+
+ agbno += aglen;
}
- if (error || !need_roll)
- return error;
-roll_out:
- rs->deferred = 0;
- return xrep_roll_ag_trans(sc);
+ return 0;
}
-/* Dispose of every block of every extent in the bitmap. */
+/* Dispose of every block of every AG metadata extent in the bitmap. */
int
-xrep_reap_extents(
+xrep_reap_ag_metadata(
struct xfs_scrub *sc,
struct xbitmap *bitmap,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type)
{
- struct xrep_reap_state rs = {
+ struct xreap_state rs = {
.sc = sc,
.oinfo = oinfo,
.resv = type,
@@ -259,10 +491,14 @@ xrep_reap_extents(
int error;
ASSERT(xfs_has_rmapbt(sc->mp));
+ ASSERT(sc->ip == NULL);
- error = xbitmap_walk_bits(bitmap, xrep_reap_block, &rs);
- if (error || rs.deferred == 0)
+ error = xbitmap_walk(bitmap, xreap_agmeta_extent, &rs);
+ if (error)
return error;
- return xrep_roll_ag_trans(sc);
+ if (xreap_dirty(&rs))
+ return xrep_defer_finish(sc);
+
+ return 0;
}