diff options
author | Darrick J. Wong <djwong@kernel.org> | 2021-09-01 10:59:04 -0700 |
---|---|---|
committer | Darrick J. Wong <djwong@kernel.org> | 2021-12-15 17:29:02 -0800 |
commit | a72e974d1b31e1d23f088e3f910d33094da2e174 (patch) | |
tree | 4530bc3b81887b7e1723d2c098fdd5982268c211 | |
parent | 4624f8c76088688180c5a5a455a89d47935fa570 (diff) |
xfs: online repair of realtime summariesrepair-rtsummary_2021-12-15
Repair the realtime summary data by constructing a new rtsummary file in
the scrub temporary file, then atomically swapping the contents.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r-- | fs/xfs/Makefile | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.c | 17 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 14 | ||||
-rw-r--r-- | fs/xfs/scrub/rtsummary.c | 11 | ||||
-rw-r--r-- | fs/xfs/scrub/rtsummary_repair.c | 116 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/tempfile.c | 247 | ||||
-rw-r--r-- | fs/xfs/scrub/tempfile.h | 14 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 39 |
9 files changed, 461 insertions, 3 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 7ddbcfc2dd46..14b098bdd7fc 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -187,6 +187,10 @@ xfs-y += $(addprefix scrub/, \ tempfile.o \ ) +xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ + rtsummary_repair.o \ + ) + xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \ quota_repair.o \ quotacheck_repair.o \ diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 59a3022ff267..247e3a971f46 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -42,6 +42,7 @@ #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/bitmap.h" +#include "scrub/xfile.h" /* * Attempt to repair some metadata, if the metadata is corrupt and userspace @@ -167,9 +168,23 @@ int xrep_roll_trans( struct xfs_scrub *sc) { + int error; + if (!sc->ip) return xrep_roll_ag_trans(sc); - return xfs_trans_roll_inode(&sc->tp, sc->ip); + + /* + * Roll the transaction with the inode we're fixing and the temp inode, + * so that neither can pin the log. + * + * XXX: does this really need to be in the rtsummary repair patch? + */ + if (sc->tempip) + xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE); + error = xfs_trans_roll_inode(&sc->tp, sc->ip); + if (sc->tempip) + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + return error; } /* diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index a83c6ad50153..198641f50505 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -76,6 +76,7 @@ int xrep_reset_perag_resv(struct xfs_scrub *sc); int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten); int xrep_metadata_inode_forks(struct xfs_scrub *sc); int xrep_setup_ag_rmapbt(struct xfs_scrub *sc); +int xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *resblks); /* Repair setup functions */ int xrep_setup_ag_allocbt(struct xfs_scrub *sc); @@ -133,6 +134,12 @@ int xrep_quotacheck(struct xfs_scrub *sc); # define xrep_quotacheck xrep_notsupported #endif /* CONFIG_XFS_QUOTA */ +#ifdef CONFIG_XFS_RT +int xrep_rtsummary(struct xfs_scrub *sc); +#else +# define xrep_rtsummary xrep_notsupported +#endif /* CONFIG_XFS_RT */ + struct xrep_newbt_resv { /* Link to list of extents that we've reserved. */ struct list_head list; @@ -235,6 +242,12 @@ xrep_setup_nothing( #define xrep_setup_ag_allocbt xrep_setup_nothing #define xrep_setup_ag_rmapbt xrep_setup_nothing +static inline int +xrep_setup_rtsummary(struct xfs_scrub *sc, unsigned int *whatever) +{ + return 0; +} + #define xrep_revalidate_allocbt (NULL) #define xrep_revalidate_iallocbt (NULL) @@ -254,6 +267,7 @@ xrep_setup_nothing( #define xrep_quota xrep_notsupported #define xrep_quotacheck xrep_notsupported #define xrep_fscounters xrep_notsupported +#define xrep_rtsummary xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index e401abba6b23..b8fb3923f7f7 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -20,6 +20,8 @@ #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/xfile.h" +#include "scrub/repair.h" +#include "scrub/tempfile.h" /* * Realtime Summary @@ -37,8 +39,15 @@ xchk_setup_rtsummary( struct xfs_scrub *sc) { struct xfs_mount *mp = sc->mp; + unsigned int resblks = 0; int error; + if (xchk_could_repair(sc)) { + error = xrep_setup_rtsummary(sc, &resblks); + if (error) + return error; + } + /* * Create an xfile to construct a new rtsummary file. The xfile allows * us to avoid pinning kernel memory for this purpose. @@ -48,7 +57,7 @@ xchk_setup_rtsummary( if (error) return error; - error = xchk_trans_alloc(sc, 0); + error = xchk_trans_alloc(sc, resblks); if (error) return error; diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c new file mode 100644 index 000000000000..97807a9dcf1d --- /dev/null +++ b/fs/xfs/scrub/rtsummary_repair.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2021 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_btree.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_rtalloc.h" +#include "xfs_inode.h" +#include "xfs_bit.h" +#include "xfs_bmap.h" +#include "xfs_bmap_btree.h" +#include "xfs_swapext.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/tempfile.h" + +/* Set us up to repair the rtsummary file. */ +int +xrep_setup_rtsummary( + struct xfs_scrub *sc, + unsigned int *resblks) +{ + struct xfs_mount *mp = sc->mp; + unsigned long long blocks; + int error; + + error = xrep_tempfile_create(sc, S_IFREG); + if (error) + return error; + + /* + * If we're doing a repair, we reserve enough blocks to write out a + * completely new summary file, plus twice as many blocks as we would + * need if we can only allocate one block per data fork mapping. This + * should cover the preallocation of the temporary file and swapping + * the extent mappings. + * + * We cannot use xfs_swapext_estimate because we have not yet + * constructed the replacement rtsummary and therefore do not know how + * many extents it will use. By the time we do, we will have a dirty + * transaction (which we cannot drop because we cannot drop the + * rtsummary ILOCK) and cannot ask for more reservation. + */ + blocks = XFS_B_TO_FSB(mp, mp->m_rsumsize); + blocks += xfs_bmbt_calc_size(mp, blocks) * 2; + if (blocks > UINT_MAX) + return -EOPNOTSUPP; + + *resblks += blocks; + return 0; +} + +/* Repair the realtime summary. */ +int +xrep_rtsummary( + struct xfs_scrub *sc) +{ + struct xfs_swapext_req req; + int error; + + /* We require the rmapbt to rebuild anything. */ + if (!xfs_has_rmapbt(sc->mp)) + return -EOPNOTSUPP; + + /* Make sure any problems with the fork are fixed. */ + error = xrep_metadata_inode_forks(sc); + if (error) + return error; + + /* + * Trylock the temporary file. We had better be the only ones holding + * onto this inode... + */ + if (!xrep_tempfile_ilock_nowait(sc, XFS_ILOCK_EXCL)) + return -EAGAIN; + + /* Make sure we have space allocated for the entire summary file. */ + xfs_trans_ijoin(sc->tp, sc->ip, 0); + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + error = xrep_tempfile_prealloc(sc, 0, + XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize)); + if (error) + return error; + + /* Last chance to abort before we start committing fixes. */ + if (xchk_should_terminate(sc, &error)) + return error; + + /* Copy the rtsummary file that we generated. */ + error = xrep_tempfile_copyin_xfile(sc, &xfs_rtbuf_ops, + XFS_BLFT_RTSUMMARY_BUF, sc->mp->m_rsumsize); + if (error) + return error; + + /* Now swap the extents. */ + error = xrep_tempfile_swapext_prep_request(sc, XFS_DATA_FORK, &req); + if (error) + return error; + + error = xrep_tempfile_swapext(sc, &req); + if (error) + return error; + + /* Stale old buffers and truncate the file. */ + return xrep_reap_fork(sc, sc->tempip, XFS_DATA_FORK); +} diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 4a2f755aa7d0..666f91f55052 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -335,7 +335,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .setup = xchk_setup_rtsummary, .scrub = xchk_rtsummary, .has = xfs_has_realtime, - .repair = xrep_notsupported, + .repair = xrep_rtsummary, }, [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ .type = ST_FS, diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index b0d0350833b9..bf5d1bd61f07 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -14,14 +14,19 @@ #include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_quota.h" +#include "xfs_bmap.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" #include "xfs_dir2.h" #include "xfs_xchgrange.h" +#include "xfs_swapext.h" +#include "xfs_defer.h" #include "scrub/scrub.h" #include "scrub/common.h" +#include "scrub/repair.h" #include "scrub/trace.h" #include "scrub/tempfile.h" +#include "scrub/xfile.h" /* * Create a temporary file for reconstructing metadata, with the intention of @@ -212,3 +217,245 @@ xrep_tempfile_rele( xfs_irele(sc->tempip); sc->tempip = NULL; } + +/* + * Make sure that the given range of the data fork of the temporary file is + * mapped to written blocks. The caller must ensure that both inodes are + * joined to the transaction. + */ +int +xrep_tempfile_prealloc( + struct xfs_scrub *sc, + xfs_fileoff_t off, + xfs_filblks_t len) +{ + xfs_fileoff_t end = off + len; + int error = 0; + + ASSERT(sc->tempip != NULL); + ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip)); + + while (off < len) { + struct xfs_bmbt_irec map; + int nmaps = 1; + + /* + * If we have a real extent mapping this block then we're + * in ok shape. + */ + error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps, + XFS_DATA_FORK); + if (error) + break; + + if (nmaps == 1 && xfs_bmap_is_written_extent(&map)) { + off += map.br_startblock; + continue; + } + + /* + * If we find a delalloc reservation then something is very + * very wrong. Bail out. + */ + if (map.br_startblock == DELAYSTARTBLOCK) + return -EFSCORRUPTED; + + /* + * Make sure this block has a real zeroed extent allocated to + * it. + */ + nmaps = 1; + error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off, + XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map, + &nmaps); + if (error) + break; + + trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map); + + /* Commit new extent and all deferred work. */ + error = xfs_defer_finish(&sc->tp); + if (error) + break; + + off += map.br_startblock; + } + + return error; +} + +/* + * Write a number of bytes from the xfile into the temp file, one filesystem + * block at a time. The caller must join both inodes to the transaction. + */ +int +xrep_tempfile_copyin_xfile( + struct xfs_scrub *sc, + const struct xfs_buf_ops *ops, + enum xfs_blft type, + xfs_fileoff_t isize) +{ + LIST_HEAD(buffers_list); + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + xfs_fileoff_t flush_mask; + xfs_rtblock_t off = 0; + loff_t pos = 0; + int error = 0; + + ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode)); + + /* Flush buffers to disk every 512K */ + flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1; + + while (pos < isize) { + struct xfs_bmbt_irec map; + int nmaps = 1; + size_t count; + + /* Read block mapping for this file block. */ + error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0); + if (error) + goto out_err; + if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) { + error = -EFSCORRUPTED; + goto out_err; + } + + /* Get the metadata buffer for this offset in the file. */ + error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, map.br_startblock), + mp->m_bsize, 0, &bp); + if (error) + goto out_err; + bp->b_ops = ops; + xfs_trans_buf_set_type(sc->tp, bp, type); + + /* Read in a block's worth of data from the xfile. */ + count = min_t(loff_t, isize - pos, mp->m_sb.sb_blocksize); + error = xfile_obj_load(sc->xfile, bp->b_addr, count, pos); + if (error) { + xfs_trans_brelse(sc->tp, bp); + goto out_err; + } + + trace_xrep_tempfile_copyin_xfile(sc, XFS_DATA_FORK, &map); + + /* Queue buffer, and flush if we have too much dirty data. */ + xfs_buf_delwri_queue_here(bp, &buffers_list); + xfs_trans_brelse(sc->tp, bp); + + if (!(off & flush_mask)) { + error = xfs_buf_delwri_submit(&buffers_list); + if (error) + goto out_err; + } + + pos += mp->m_sb.sb_blocksize; + off++; + } + + /* + * Write the new blocks to disk. If the ordered list isn't empty after + * that, then something went wrong and we have to fail. This should + * never happen, but we'll check anyway. + */ + error = xfs_buf_delwri_submit(&buffers_list); + if (error) + goto out_err; + + if (!list_empty(&buffers_list)) { + ASSERT(list_empty(&buffers_list)); + error = -EIO; + goto out_err; + } + + /* Set the new inode size, if needed. */ + if (sc->tempip->i_disk_size != isize) { + sc->tempip->i_disk_size = isize; + i_size_write(VFS_I(sc->tempip), isize); + xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE); + return xrep_roll_trans(sc); + } + + return 0; + +out_err: + xfs_buf_delwri_cancel(&buffers_list); + return error; +} + +/* + * Fill out the swapext request in preparation for swapping the contents of a + * metadata file that we've rebuilt in the temp file. + */ +int +xrep_tempfile_swapext_prep_request( + struct xfs_scrub *sc, + int whichfork, + struct xfs_swapext_req *req) +{ + /* COW forks don't exist on disk. */ + if (whichfork == XFS_COW_FORK) { + ASSERT(0); + return -EINVAL; + } + + /* Both files should have the relevant forks. */ + if (!XFS_IFORK_PTR(sc->ip, whichfork) || + !XFS_IFORK_PTR(sc->tempip, whichfork)) { + ASSERT(0); + return -EINVAL; + } + + /* Swap all mappings in both forks. */ + req->ip1 = sc->tempip; + req->ip2 = sc->ip; + req->startoff1 = 0; + req->startoff2 = 0; + req->whichfork = whichfork; + req->blockcount = XFS_MAX_FILEOFF; + req->req_flags = 0; + + /* Always swap sizes when we're swapping data fork mappings. */ + if (whichfork == XFS_DATA_FORK) + req->req_flags |= XFS_SWAP_REQ_SET_SIZES; + + /* + * If we're repairing xattrs or directories, always try to convert ip2 + * to short format after swapping. + */ + if (whichfork == XFS_ATTR_FORK || S_ISDIR(VFS_I(sc->ip)->i_mode)) + req->req_flags |= XFS_SWAP_REQ_FILE2_CVT_SF; + + return 0; +} + +/* Swap forks between the file being repaired and the temporary file. */ +int +xrep_tempfile_swapext( + struct xfs_scrub *sc, + struct xfs_swapext_req *req) +{ + int error; + + error = xfs_swapext(&sc->tp, req); + if (error) + return error; + + /* + * If we swapped the ondisk sizes of two metadata files, we must swap + * the incore sizes as well. Since online fsck doesn't use swapext on + * the data forks of user-accessible files, the two sizes are always + * the same, so we don't need to log the inodes. + */ + if (req->req_flags & XFS_SWAP_REQ_SET_SIZES) { + loff_t temp; + + temp = i_size_read(VFS_I(sc->ip)); + i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); + i_size_write(VFS_I(sc->tempip), temp); + } + + return 0; +} diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h index a6a4c8d6a373..2b9a9e6fa9d9 100644 --- a/fs/xfs/scrub/tempfile.h +++ b/fs/xfs/scrub/tempfile.h @@ -13,6 +13,20 @@ void xrep_tempfile_rele(struct xfs_scrub *sc); void xrep_tempfile_ilock(struct xfs_scrub *sc, unsigned int ilock_flags); bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc, unsigned int ilock_flags); void xrep_tempfile_iunlock(struct xfs_scrub *sc, unsigned int ilock_flags); + +int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off, + xfs_filblks_t len); + +enum xfs_blft; + +int xrep_tempfile_copyin_xfile(struct xfs_scrub *sc, + const struct xfs_buf_ops *ops, enum xfs_blft type, + xfs_fileoff_t isize); + +struct xfs_swapext_req; +int xrep_tempfile_swapext_prep_request(struct xfs_scrub *sc, int whichfork, + struct xfs_swapext_req *req); +int xrep_tempfile_swapext(struct xfs_scrub *sc, struct xfs_swapext_req *req); #else # define xrep_tempfile_rele(sc) #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 932e5e900410..b94c209e4bfc 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1527,6 +1527,45 @@ TRACE_EVENT(xrep_tempfile_create, __entry->temp_inum) ); +DECLARE_EVENT_CLASS(xrep_tempfile_class, + TP_PROTO(struct xfs_scrub *sc, int whichfork, + struct xfs_bmbt_irec *irec), + TP_ARGS(sc, whichfork, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, whichfork) + __field(xfs_fileoff_t, lblk) + __field(xfs_filblks_t, len) + __field(xfs_fsblock_t, pblk) + __field(int, state) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->ino = sc->tempip->i_ino; + __entry->whichfork = whichfork; + __entry->lblk = irec->br_startoff; + __entry->len = irec->br_blockcount; + __entry->pblk = irec->br_startblock; + __entry->state = irec->br_state; + ), + TP_printk("dev %d:%d ino 0x%llx whichfork %s fileoff 0x%llx fsbcount 0x%llx startblock 0x%llx state %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), + __entry->lblk, + __entry->len, + __entry->pblk, + __entry->state) +); +#define DEFINE_XREP_TEMPFILE_EVENT(name) \ +DEFINE_EVENT(xrep_tempfile_class, name, \ + TP_PROTO(struct xfs_scrub *sc, int whichfork, \ + struct xfs_bmbt_irec *irec), \ + TP_ARGS(sc, whichfork, irec)) +DEFINE_XREP_TEMPFILE_EVENT(xrep_tempfile_prealloc); +DEFINE_XREP_TEMPFILE_EVENT(xrep_tempfile_copyin_xfile); + TRACE_EVENT(xrep_bmapi_reap_extent, TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *ip, int whichfork, const struct xfs_bmbt_irec *irec), |