summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 11:18:00 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:41:04 -0700
commitc923dcdabb08a1a84c3b76f263696b3c210434b9 (patch)
treeb4de9887c1f6e6027b3ab90a9735bdbefd1a8d99
parentb65c5d4c0bc77a39c44f6045bf6bfb94b8fc7468 (diff)
xfs: online repair of realtime bitmaps
Rebuild the realtime bitmap from the realtime rmap btree. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/repair.h6
-rw-r--r--fs/xfs/scrub/rtbitmap.c12
-rw-r--r--fs/xfs/scrub/rtbitmap_repair.c305
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/trace.h58
-rw-r--r--fs/xfs/scrub/xfblob.c4
7 files changed, 384 insertions, 4 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index e340ea4f281f..d63dbf228773 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -197,6 +197,7 @@ xfs-y += $(addprefix scrub/, \
)
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
+ rtbitmap_repair.o \
rtsummary_repair.o \
)
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index fe095d0653da..9869f4f65294 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -79,6 +79,7 @@ int xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *resblks);
int xrep_setup_xattr(struct xfs_scrub *sc);
int xrep_setup_directory(struct xfs_scrub *sc);
int xrep_setup_parent(struct xfs_scrub *sc);
+int xrep_setup_rtbitmap(struct xfs_scrub *sc, unsigned int *resblks);
int xrep_xattr_reset_fork(struct xfs_scrub *sc, struct xfs_inode *ip);
@@ -149,8 +150,10 @@ int xrep_quotacheck(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xrep_rtsummary(struct xfs_scrub *sc);
+int xrep_rtbitmap(struct xfs_scrub *sc);
#else
# define xrep_rtsummary xrep_notsupported
+# define xrep_rtbitmap xrep_notsupported
#endif /* CONFIG_XFS_RT */
struct xrep_newbt_resv {
@@ -265,6 +268,8 @@ xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *resblks)
return 0;
}
+#define xrep_setup_rtbitmap xrep_setup_rtsummary
+
static inline int
xrep_setup_xattr(
struct xfs_scrub *sc)
@@ -298,6 +303,7 @@ xrep_setup_xattr(
#define xrep_xattr xrep_notsupported
#define xrep_directory xrep_notsupported
#define xrep_parent xrep_notsupported
+#define xrep_rtbitmap xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index b7a5d0cffb0f..77a50d1dafb3 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -20,15 +20,25 @@
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
+#include "scrub/xfile.h"
+#include "scrub/repair.h"
+#include "scrub/tempfile.h"
/* Set us up with the realtime metadata locked. */
int
xchk_setup_rtbitmap(
struct xfs_scrub *sc)
{
+ unsigned int resblks = 0;
int error;
- error = xchk_trans_alloc(sc, 0);
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
+ error = xrep_setup_rtbitmap(sc, &resblks);
+ if (error)
+ return error;
+ }
+
+ error = xchk_trans_alloc(sc, resblks);
if (error)
return error;
diff --git a/fs/xfs/scrub/rtbitmap_repair.c b/fs/xfs/scrub/rtbitmap_repair.c
new file mode 100644
index 000000000000..494460e81acc
--- /dev/null
+++ b/fs/xfs/scrub/rtbitmap_repair.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_swapext.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/xfile.h"
+#include "scrub/tempfile.h"
+
+struct xrep_rtbmp {
+ struct xfs_scrub *sc;
+
+ /* The next rt block we expect to see during our rtrmapbt walk. */
+ xfs_rtblock_t next_rtbno;
+};
+
+/*
+ * Compute the byte offset of the xfs_rtword_t corresponding to the given rt
+ * extent's location in the bitmap.
+ */
+static inline loff_t
+rtword_off(
+ xfs_rtblock_t rt_ext)
+{
+ return (rt_ext >> XFS_NBWORDLOG) * sizeof(xfs_rtword_t);
+}
+
+/* Perform a logical OR operation on an rtword in the incore bitmap. */
+static int
+xrep_rtbitmap_or(
+ struct xrep_rtbmp *rb,
+ xfs_rtblock_t rt_ext,
+ xfs_rtword_t mask)
+{
+ loff_t pos = rtword_off(rt_ext);
+ xfs_rtword_t word;
+ int error;
+
+ error = xfile_obj_load(rb->sc->xfile, &word, sizeof(word), pos);
+ if (error)
+ return error;
+
+ trace_xrep_rtbitmap_or(rb->sc->mp, rt_ext, pos, mask, word);
+
+ word |= mask;
+ return xfile_obj_store(rb->sc->xfile, &word, sizeof(word), pos);
+}
+
+/* Mark a range free in the incore rtbitmap. */
+STATIC int
+xrep_rtbitmap_mark_free(
+ struct xrep_rtbmp *rb,
+ xfs_rtblock_t next)
+{
+ struct xfs_mount *mp = rb->sc->mp;
+ xfs_rtblock_t start_ext;
+ xfs_rtblock_t next_ext;
+ loff_t pos, endpos;
+ unsigned int bit;
+ unsigned int mod;
+ xfs_rtword_t mask;
+ int error;
+
+ if (!xfs_verify_rtext(mp, rb->next_rtbno, next - rb->next_rtbno))
+ return -EFSCORRUPTED;
+
+ /* Convert rt blocks to rt extents. */
+ start_ext = div_u64_rem(rb->next_rtbno, mp->m_sb.sb_rextsize, &mod);
+ if (mod)
+ return -EFSCORRUPTED;
+ next_ext = div_u64_rem(next, mp->m_sb.sb_rextsize, &mod);
+ if (mod)
+ return -EFSCORRUPTED;
+
+ trace_xrep_rtbitmap_record_free(mp, start_ext, next_ext - 1);
+
+ /* Set bits as needed to round start_ext up to the nearest word. */
+ bit = start_ext & (XFS_NBWORD - 1);
+ if (bit) {
+ xfs_rtblock_t len = next_ext - start_ext;
+ unsigned int lastbit;
+
+ lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+ mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+
+ error = xrep_rtbitmap_or(rb, start_ext, mask);
+ if (error || lastbit - bit == len)
+ return error;
+ start_ext += XFS_NBWORD - bit;
+ }
+
+ /* Set bits as needed to round next_ext down to the nearest word. */
+ bit = next_ext & (XFS_NBWORD - 1);
+ if (bit) {
+ mask = ((xfs_rtword_t)1 << bit) - 1;
+
+ error = xrep_rtbitmap_or(rb, next_ext, mask);
+ if (error || start_ext + bit == next_ext)
+ return error;
+ next_ext -= bit;
+ }
+
+ trace_xrep_rtbitmap_record_free_bulk(mp, start_ext, next_ext - 1);
+
+ /* Set all the bytes in between, up to a whole fs block at once. */
+ pos = start_ext >> XFS_NBBYLOG;
+ endpos = next_ext >> XFS_NBBYLOG;
+
+ while (pos < endpos) {
+ loff_t rem;
+ size_t count = min_t(loff_t, endpos - pos,
+ mp->m_sb.sb_blocksize);
+
+ /* Try to get us aligned to an even blocksize. */
+ rem = pos & (mp->m_sb.sb_blocksize - 1);
+ if (rem)
+ count = min_t(loff_t, count,
+ mp->m_sb.sb_blocksize - rem);
+
+ error = xfile_obj_store(rb->sc->xfile, rb->sc->buf, count, pos);
+ if (error)
+ return error;
+ pos += count;
+ }
+
+ return 0;
+}
+
+/* Set up to repair the realtime bitmap. */
+int
+xrep_setup_rtbitmap(
+ struct xfs_scrub *sc,
+ unsigned int *resblks)
+{
+ struct xfs_mount *mp = sc->mp;
+ unsigned long long blocks = 0;
+ loff_t bmp_bytes;
+ int error;
+
+ error = xrep_tempfile_create(sc, S_IFREG);
+ if (error)
+ return error;
+
+ /* Create an xfile to hold our reconstructed bitmap. */
+ bmp_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rbmblocks);
+ sc->xfile = xfile_create("rtbitmap", bmp_bytes);
+ if (IS_ERR(sc->xfile))
+ return PTR_ERR(sc->xfile);
+
+ /*
+ * Allocate a memory buffer for faster creation of new bitmap
+ * blocks.
+ */
+ sc->buf = kvmalloc(mp->m_sb.sb_blocksize,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
+ if (!sc->buf)
+ return -ENOMEM;
+
+ /*
+ * Reserve enough blocks to write out a completely new bitmap file,
+ * plus twice as many blocks as we would need if we can only allocate
+ * one block per data fork mapping. This should cover the
+ * preallocation of the temporary file and swapping the extent
+ * mappings.
+ *
+ * We cannot use xfs_swapext_estimate because we have not yet
+ * constructed the replacement bitmap and therefore do not know how
+ * many extents it will use. By the time we do, we will have a dirty
+ * transaction (which we cannot drop because we cannot drop the
+ * rtbitmap ILOCK) and cannot ask for more reservation.
+ */
+ blocks = mp->m_sb.sb_rbmblocks;
+ blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
+ if (blocks > UINT_MAX)
+ return -EOPNOTSUPP;
+
+ *resblks = blocks;
+ return 0;
+}
+
+/* Set free space in the rtbitmap based on rtrmapbt records. */
+STATIC int
+xrep_rtbitmap_walk_rtrmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rtbmp *rb = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(rb->sc, &error))
+ return error;
+
+ if (rb->next_rtbno < rec->rm_startblock) {
+ error = xrep_rtbitmap_mark_free(rb, rec->rm_startblock);
+ if (error)
+ return error;
+ }
+
+ rb->next_rtbno = max(rb->next_rtbno,
+ rec->rm_startblock + rec->rm_blockcount);
+ return 0;
+}
+
+/*
+ * Walk the rtrmapbt to find all the gaps between records, and mark the gaps
+ * in the realtime bitmap that we're computing.
+ */
+STATIC int
+xrep_rtbitmap_find_freespace(
+ struct xrep_rtbmp *rb)
+{
+ struct xfs_scrub *sc = rb->sc;
+ int error;
+
+ /* Prepare a buffer of ones so that we can accelerate bulk setting. */
+ memset(sc->buf, 0xFF, sc->mp->m_sb.sb_blocksize);
+
+ xrep_rt_btcur_init(sc, &sc->sr);
+ error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
+ rb);
+ xchk_rt_btcur_free(&sc->sr);
+ if (error || sc->mp->m_sb.sb_rblocks == rb->next_rtbno)
+ return error;
+
+ /* Mark all space to the end of the rt device free. */
+ return xrep_rtbitmap_mark_free(rb, sc->mp->m_sb.sb_rblocks);
+}
+
+/* Repair the realtime bitmap. */
+int
+xrep_rtbitmap(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rtbmp rb = {
+ .sc = sc,
+ };
+ struct xfs_swapext_req req = { .req_flags = 0 };
+ int error;
+
+ /*
+ * We require the realtime rmapbt (and atomic file updates) to rebuild
+ * anything.
+ */
+ if (!xfs_has_rtrmapbt(sc->mp))
+ return -EOPNOTSUPP;
+
+ /* Generate the new rtbitmap data. */
+ error = xrep_rtbitmap_find_freespace(&rb);
+ if (error)
+ return error;
+
+ /* Make sure any problems with the fork are fixed. */
+ error = xrep_metadata_inode_forks(sc);
+ if (error)
+ return error;
+
+ /*
+ * Trylock the temporary file. We had better be the only ones holding
+ * onto this inode...
+ */
+ if (!xrep_tempfile_ilock_nowait(sc, XFS_ILOCK_EXCL))
+ return -EAGAIN;
+
+ /* Make sure we have space allocated for the entire bitmap file. */
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ error = xrep_tempfile_prealloc(sc, 0, sc->mp->m_sb.sb_rbmblocks);
+ if (error)
+ return error;
+
+ /* Copy the bitmap file that we generated. */
+ error = xrep_tempfile_copyin(sc, &xfs_rtbuf_ops,
+ XFS_BLFT_RTBITMAP_BUF,
+ XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
+ if (error)
+ return error;
+
+ /* Now swap the extents. */
+ req.ip1 = sc->tempip;
+ req.ip2 = sc->ip;
+ req.whichfork = XFS_DATA_FORK;
+ req.blockcount = sc->mp->m_sb.sb_rbmblocks;
+ return xfs_swapext(&sc->tp, &req);
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 1617a7c613e7..186e198a330c 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -333,7 +333,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_rtbitmap,
.scrub = xchk_rtbitmap,
.has = xfs_has_realtime,
- .repair = xrep_notsupported,
+ .repair = xrep_rtbitmap,
},
[XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
.type = ST_FS,
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index e9e5ca936e01..816011123321 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1749,6 +1749,64 @@ DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_dir_salvaged_parent);
DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_findparent_dirent);
DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_findparent_from_dcache);
+DECLARE_EVENT_CLASS(xrep_rtbitmap_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start, xfs_rtblock_t end),
+ TP_ARGS(mp, start, end),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rtblock_t, start)
+ __field(xfs_rtblock_t, end)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->start = start;
+ __entry->end = end;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rtx 0x%llx rtxcount 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->start,
+ __entry->end)
+);
+#define DEFINE_REPAIR_RTBITMAP_EVENT(name) \
+DEFINE_EVENT(xrep_rtbitmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start, \
+ xfs_rtblock_t end), \
+ TP_ARGS(mp, start, end))
+DEFINE_REPAIR_RTBITMAP_EVENT(xrep_rtbitmap_record_free);
+DEFINE_REPAIR_RTBITMAP_EVENT(xrep_rtbitmap_record_free_bulk);
+
+TRACE_EVENT(xrep_rtbitmap_or,
+ TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t rtx, loff_t pos,
+ xfs_rtword_t mask, xfs_rtword_t word),
+ TP_ARGS(mp, rtx, pos, mask, word),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rtblock_t, rtx)
+ __field(loff_t, pos)
+ __field(unsigned int, mask)
+ __field(unsigned int, word)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->rtx = rtx;
+ __entry->pos = pos;
+ __entry->mask = mask;
+ __entry->word = word;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rtx 0x%llx rbmpos 0x%llx mask 0x%x word 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->rtx,
+ __entry->pos,
+ __entry->mask,
+ __entry->word)
+)
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/scrub/xfblob.c b/fs/xfs/scrub/xfblob.c
index 7ecc840a1013..6eda335211e3 100644
--- a/fs/xfs/scrub/xfblob.c
+++ b/fs/xfs/scrub/xfblob.c
@@ -13,8 +13,8 @@
/*
* XFS Blob Storage
* ================
- * Stores and retrieves blobs using a memfd object. Objects are appended to
- * the file and the offset is returned as a magic cookie for retrieval.
+ * Stores and retrieves blobs using an xfile. Objects are appended to the file
+ * and the offset is returned as a magic cookie for retrieval.
*/
#define XB_KEY_MAGIC 0xABAADDAD