diff options
author | Darrick J. Wong <djwong@kernel.org> | 2021-09-01 11:18:00 -0700 |
---|---|---|
committer | Darrick J. Wong <djwong@kernel.org> | 2021-10-22 16:41:04 -0700 |
commit | c923dcdabb08a1a84c3b76f263696b3c210434b9 (patch) | |
tree | b4de9887c1f6e6027b3ab90a9735bdbefd1a8d99 | |
parent | b65c5d4c0bc77a39c44f6045bf6bfb94b8fc7468 (diff) |
xfs: online repair of realtime bitmaps
Rebuild the realtime bitmap from the realtime rmap btree.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 6 | ||||
-rw-r--r-- | fs/xfs/scrub/rtbitmap.c | 12 | ||||
-rw-r--r-- | fs/xfs/scrub/rtbitmap_repair.c | 305 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 58 | ||||
-rw-r--r-- | fs/xfs/scrub/xfblob.c | 4 |
7 files changed, 384 insertions, 4 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index e340ea4f281f..d63dbf228773 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -197,6 +197,7 @@ xfs-y += $(addprefix scrub/, \ ) xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ + rtbitmap_repair.o \ rtsummary_repair.o \ ) diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index fe095d0653da..9869f4f65294 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -79,6 +79,7 @@ int xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *resblks); int xrep_setup_xattr(struct xfs_scrub *sc); int xrep_setup_directory(struct xfs_scrub *sc); int xrep_setup_parent(struct xfs_scrub *sc); +int xrep_setup_rtbitmap(struct xfs_scrub *sc, unsigned int *resblks); int xrep_xattr_reset_fork(struct xfs_scrub *sc, struct xfs_inode *ip); @@ -149,8 +150,10 @@ int xrep_quotacheck(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xrep_rtsummary(struct xfs_scrub *sc); +int xrep_rtbitmap(struct xfs_scrub *sc); #else # define xrep_rtsummary xrep_notsupported +# define xrep_rtbitmap xrep_notsupported #endif /* CONFIG_XFS_RT */ struct xrep_newbt_resv { @@ -265,6 +268,8 @@ xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *resblks) return 0; } +#define xrep_setup_rtbitmap xrep_setup_rtsummary + static inline int xrep_setup_xattr( struct xfs_scrub *sc) @@ -298,6 +303,7 @@ xrep_setup_xattr( #define xrep_xattr xrep_notsupported #define xrep_directory xrep_notsupported #define xrep_parent xrep_notsupported +#define xrep_rtbitmap xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index b7a5d0cffb0f..77a50d1dafb3 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -20,15 +20,25 @@ #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" +#include "scrub/xfile.h" +#include "scrub/repair.h" +#include "scrub/tempfile.h" /* Set us up with the realtime metadata locked. */ int xchk_setup_rtbitmap( struct xfs_scrub *sc) { + unsigned int resblks = 0; int error; - error = xchk_trans_alloc(sc, 0); + if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { + error = xrep_setup_rtbitmap(sc, &resblks); + if (error) + return error; + } + + error = xchk_trans_alloc(sc, resblks); if (error) return error; diff --git a/fs/xfs/scrub/rtbitmap_repair.c b/fs/xfs/scrub/rtbitmap_repair.c new file mode 100644 index 000000000000..494460e81acc --- /dev/null +++ b/fs/xfs/scrub/rtbitmap_repair.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2021 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_btree.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_rtalloc.h" +#include "xfs_inode.h" +#include "xfs_bit.h" +#include "xfs_bmap.h" +#include "xfs_bmap_btree.h" +#include "xfs_rmap.h" +#include "xfs_rtrmap_btree.h" +#include "xfs_swapext.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/xfile.h" +#include "scrub/tempfile.h" + +struct xrep_rtbmp { + struct xfs_scrub *sc; + + /* The next rt block we expect to see during our rtrmapbt walk. */ + xfs_rtblock_t next_rtbno; +}; + +/* + * Compute the byte offset of the xfs_rtword_t corresponding to the given rt + * extent's location in the bitmap. + */ +static inline loff_t +rtword_off( + xfs_rtblock_t rt_ext) +{ + return (rt_ext >> XFS_NBWORDLOG) * sizeof(xfs_rtword_t); +} + +/* Perform a logical OR operation on an rtword in the incore bitmap. */ +static int +xrep_rtbitmap_or( + struct xrep_rtbmp *rb, + xfs_rtblock_t rt_ext, + xfs_rtword_t mask) +{ + loff_t pos = rtword_off(rt_ext); + xfs_rtword_t word; + int error; + + error = xfile_obj_load(rb->sc->xfile, &word, sizeof(word), pos); + if (error) + return error; + + trace_xrep_rtbitmap_or(rb->sc->mp, rt_ext, pos, mask, word); + + word |= mask; + return xfile_obj_store(rb->sc->xfile, &word, sizeof(word), pos); +} + +/* Mark a range free in the incore rtbitmap. */ +STATIC int +xrep_rtbitmap_mark_free( + struct xrep_rtbmp *rb, + xfs_rtblock_t next) +{ + struct xfs_mount *mp = rb->sc->mp; + xfs_rtblock_t start_ext; + xfs_rtblock_t next_ext; + loff_t pos, endpos; + unsigned int bit; + unsigned int mod; + xfs_rtword_t mask; + int error; + + if (!xfs_verify_rtext(mp, rb->next_rtbno, next - rb->next_rtbno)) + return -EFSCORRUPTED; + + /* Convert rt blocks to rt extents. */ + start_ext = div_u64_rem(rb->next_rtbno, mp->m_sb.sb_rextsize, &mod); + if (mod) + return -EFSCORRUPTED; + next_ext = div_u64_rem(next, mp->m_sb.sb_rextsize, &mod); + if (mod) + return -EFSCORRUPTED; + + trace_xrep_rtbitmap_record_free(mp, start_ext, next_ext - 1); + + /* Set bits as needed to round start_ext up to the nearest word. */ + bit = start_ext & (XFS_NBWORD - 1); + if (bit) { + xfs_rtblock_t len = next_ext - start_ext; + unsigned int lastbit; + + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + + error = xrep_rtbitmap_or(rb, start_ext, mask); + if (error || lastbit - bit == len) + return error; + start_ext += XFS_NBWORD - bit; + } + + /* Set bits as needed to round next_ext down to the nearest word. */ + bit = next_ext & (XFS_NBWORD - 1); + if (bit) { + mask = ((xfs_rtword_t)1 << bit) - 1; + + error = xrep_rtbitmap_or(rb, next_ext, mask); + if (error || start_ext + bit == next_ext) + return error; + next_ext -= bit; + } + + trace_xrep_rtbitmap_record_free_bulk(mp, start_ext, next_ext - 1); + + /* Set all the bytes in between, up to a whole fs block at once. */ + pos = start_ext >> XFS_NBBYLOG; + endpos = next_ext >> XFS_NBBYLOG; + + while (pos < endpos) { + loff_t rem; + size_t count = min_t(loff_t, endpos - pos, + mp->m_sb.sb_blocksize); + + /* Try to get us aligned to an even blocksize. */ + rem = pos & (mp->m_sb.sb_blocksize - 1); + if (rem) + count = min_t(loff_t, count, + mp->m_sb.sb_blocksize - rem); + + error = xfile_obj_store(rb->sc->xfile, rb->sc->buf, count, pos); + if (error) + return error; + pos += count; + } + + return 0; +} + +/* Set up to repair the realtime bitmap. */ +int +xrep_setup_rtbitmap( + struct xfs_scrub *sc, + unsigned int *resblks) +{ + struct xfs_mount *mp = sc->mp; + unsigned long long blocks = 0; + loff_t bmp_bytes; + int error; + + error = xrep_tempfile_create(sc, S_IFREG); + if (error) + return error; + + /* Create an xfile to hold our reconstructed bitmap. */ + bmp_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rbmblocks); + sc->xfile = xfile_create("rtbitmap", bmp_bytes); + if (IS_ERR(sc->xfile)) + return PTR_ERR(sc->xfile); + + /* + * Allocate a memory buffer for faster creation of new bitmap + * blocks. + */ + sc->buf = kvmalloc(mp->m_sb.sb_blocksize, + GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL); + if (!sc->buf) + return -ENOMEM; + + /* + * Reserve enough blocks to write out a completely new bitmap file, + * plus twice as many blocks as we would need if we can only allocate + * one block per data fork mapping. This should cover the + * preallocation of the temporary file and swapping the extent + * mappings. + * + * We cannot use xfs_swapext_estimate because we have not yet + * constructed the replacement bitmap and therefore do not know how + * many extents it will use. By the time we do, we will have a dirty + * transaction (which we cannot drop because we cannot drop the + * rtbitmap ILOCK) and cannot ask for more reservation. + */ + blocks = mp->m_sb.sb_rbmblocks; + blocks += xfs_bmbt_calc_size(mp, blocks) * 2; + if (blocks > UINT_MAX) + return -EOPNOTSUPP; + + *resblks = blocks; + return 0; +} + +/* Set free space in the rtbitmap based on rtrmapbt records. */ +STATIC int +xrep_rtbitmap_walk_rtrmap( + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *rec, + void *priv) +{ + struct xrep_rtbmp *rb = priv; + int error = 0; + + if (xchk_should_terminate(rb->sc, &error)) + return error; + + if (rb->next_rtbno < rec->rm_startblock) { + error = xrep_rtbitmap_mark_free(rb, rec->rm_startblock); + if (error) + return error; + } + + rb->next_rtbno = max(rb->next_rtbno, + rec->rm_startblock + rec->rm_blockcount); + return 0; +} + +/* + * Walk the rtrmapbt to find all the gaps between records, and mark the gaps + * in the realtime bitmap that we're computing. + */ +STATIC int +xrep_rtbitmap_find_freespace( + struct xrep_rtbmp *rb) +{ + struct xfs_scrub *sc = rb->sc; + int error; + + /* Prepare a buffer of ones so that we can accelerate bulk setting. */ + memset(sc->buf, 0xFF, sc->mp->m_sb.sb_blocksize); + + xrep_rt_btcur_init(sc, &sc->sr); + error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap, + rb); + xchk_rt_btcur_free(&sc->sr); + if (error || sc->mp->m_sb.sb_rblocks == rb->next_rtbno) + return error; + + /* Mark all space to the end of the rt device free. */ + return xrep_rtbitmap_mark_free(rb, sc->mp->m_sb.sb_rblocks); +} + +/* Repair the realtime bitmap. */ +int +xrep_rtbitmap( + struct xfs_scrub *sc) +{ + struct xrep_rtbmp rb = { + .sc = sc, + }; + struct xfs_swapext_req req = { .req_flags = 0 }; + int error; + + /* + * We require the realtime rmapbt (and atomic file updates) to rebuild + * anything. + */ + if (!xfs_has_rtrmapbt(sc->mp)) + return -EOPNOTSUPP; + + /* Generate the new rtbitmap data. */ + error = xrep_rtbitmap_find_freespace(&rb); + if (error) + return error; + + /* Make sure any problems with the fork are fixed. */ + error = xrep_metadata_inode_forks(sc); + if (error) + return error; + + /* + * Trylock the temporary file. We had better be the only ones holding + * onto this inode... + */ + if (!xrep_tempfile_ilock_nowait(sc, XFS_ILOCK_EXCL)) + return -EAGAIN; + + /* Make sure we have space allocated for the entire bitmap file. */ + xfs_trans_ijoin(sc->tp, sc->ip, 0); + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + error = xrep_tempfile_prealloc(sc, 0, sc->mp->m_sb.sb_rbmblocks); + if (error) + return error; + + /* Copy the bitmap file that we generated. */ + error = xrep_tempfile_copyin(sc, &xfs_rtbuf_ops, + XFS_BLFT_RTBITMAP_BUF, + XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks)); + if (error) + return error; + + /* Now swap the extents. */ + req.ip1 = sc->tempip; + req.ip2 = sc->ip; + req.whichfork = XFS_DATA_FORK; + req.blockcount = sc->mp->m_sb.sb_rbmblocks; + return xfs_swapext(&sc->tp, &req); +} diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 1617a7c613e7..186e198a330c 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -333,7 +333,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .setup = xchk_setup_rtbitmap, .scrub = xchk_rtbitmap, .has = xfs_has_realtime, - .repair = xrep_notsupported, + .repair = xrep_rtbitmap, }, [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ .type = ST_FS, diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index e9e5ca936e01..816011123321 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1749,6 +1749,64 @@ DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_dir_salvaged_parent); DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_findparent_dirent); DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_findparent_from_dcache); +DECLARE_EVENT_CLASS(xrep_rtbitmap_class, + TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start, xfs_rtblock_t end), + TP_ARGS(mp, start, end), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, rtdev) + __field(xfs_rtblock_t, start) + __field(xfs_rtblock_t, end) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->rtdev = mp->m_rtdev_targp->bt_dev; + __entry->start = start; + __entry->end = end; + ), + TP_printk("dev %d:%d rtdev %d:%d rtx 0x%llx rtxcount 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->rtdev), MINOR(__entry->rtdev), + __entry->start, + __entry->end) +); +#define DEFINE_REPAIR_RTBITMAP_EVENT(name) \ +DEFINE_EVENT(xrep_rtbitmap_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start, \ + xfs_rtblock_t end), \ + TP_ARGS(mp, start, end)) +DEFINE_REPAIR_RTBITMAP_EVENT(xrep_rtbitmap_record_free); +DEFINE_REPAIR_RTBITMAP_EVENT(xrep_rtbitmap_record_free_bulk); + +TRACE_EVENT(xrep_rtbitmap_or, + TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t rtx, loff_t pos, + xfs_rtword_t mask, xfs_rtword_t word), + TP_ARGS(mp, rtx, pos, mask, word), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, rtdev) + __field(xfs_rtblock_t, rtx) + __field(loff_t, pos) + __field(unsigned int, mask) + __field(unsigned int, word) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->rtdev = mp->m_rtdev_targp->bt_dev; + __entry->rtx = rtx; + __entry->pos = pos; + __entry->mask = mask; + __entry->word = word; + ), + TP_printk("dev %d:%d rtdev %d:%d rtx 0x%llx rbmpos 0x%llx mask 0x%x word 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->rtdev), MINOR(__entry->rtdev), + __entry->rtx, + __entry->pos, + __entry->mask, + __entry->word) +) + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */ diff --git a/fs/xfs/scrub/xfblob.c b/fs/xfs/scrub/xfblob.c index 7ecc840a1013..6eda335211e3 100644 --- a/fs/xfs/scrub/xfblob.c +++ b/fs/xfs/scrub/xfblob.c @@ -13,8 +13,8 @@ /* * XFS Blob Storage * ================ - * Stores and retrieves blobs using a memfd object. Objects are appended to - * the file and the offset is returned as a magic cookie for retrieval. + * Stores and retrieves blobs using an xfile. Objects are appended to the file + * and the offset is returned as a magic cookie for retrieval. */ #define XB_KEY_MAGIC 0xABAADDAD |