diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2020-03-12 14:49:02 -0700 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2020-06-24 18:12:20 -0700 |
commit | b0f907567458aeb3f3776ebced291c0bacbf9750 (patch) | |
tree | 21fa216b2108c1a23bf85e0687a57c7e653090fc | |
parent | b4a8e309923630d5b42c0bee70e6dfd066ba862b (diff) |
xfs: online repair of realtime summariesrepair-fsfile-metadata_2020-06-24
Repair the online summary data.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/Makefile | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/attr_repair.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/common.h | 8 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.c | 256 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 19 | ||||
-rw-r--r-- | fs/xfs/scrub/rtsummary.c | 13 | ||||
-rw-r--r-- | fs/xfs/scrub/rtsummary_repair.c | 45 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 7 |
8 files changed, 347 insertions, 7 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 52c9cf0eaecd..c457366d5060 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -179,5 +179,9 @@ xfs-y += $(addprefix scrub/, \ repair.o \ symlink_repair.o \ ) + +xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ + rtsummary_repair.o \ + ) endif endif diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 58e376397168..8967e8d960ff 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -582,7 +582,7 @@ xrep_xattr_stale_block( * join the inode to the transaction. This function returns with the inode * joined to a clean scrub transaction. */ -STATIC int +int xrep_xattr_reset_fork( struct xfs_scrub *sc) { diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 3324c88f6a08..0a79ea44315c 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -155,4 +155,12 @@ int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode); void xchk_stop_reaping(struct xfs_scrub *sc); void xchk_start_reaping(struct xfs_scrub *sc); +/* Do we need to invoke the repair tool? */ +static inline bool xfs_scrub_needs_repair(struct xfs_scrub_metadata *sm) +{ + return sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | + XFS_SCRUB_OFLAG_XCORRUPT | + XFS_SCRUB_OFLAG_PREEN); +} + #endif /* __XFS_SCRUB_COMMON_H__ */ diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 1f3238bb4eea..a04ee90504e9 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -28,11 +28,14 @@ #include "xfs_bmap.h" #include "xfs_defer.h" #include "xfs_extfree_item.h" +#include "xfs_attr.h" +#include "xfs_reflink.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/bitmap.h" +#include "scrub/xfile.h" /* * Attempt to repair some metadata, if the metadata is corrupt and userspace @@ -1415,3 +1418,256 @@ xrep_buf_verify_struct( return fa == NULL; } + +/* + * Repair the attr/data forks of a metadata inode. The caller must ensure that + * sc->ip points to the metadata inode and the ILOCK is held on that inode. + * The inode must not be joined to the transaction before the call, and will + * not be afterwards. + */ +int +xrep_metadata_inode_forks( + struct xfs_scrub *sc) +{ + __u32 smtype; + __u32 smflags; + bool dirty = false; + int error; + + /* Clear the reflink flag since metadata never shares. */ + if (xfs_is_reflink_inode(sc->ip)) { + dirty = true; + xfs_trans_ijoin(sc->tp, sc->ip, 0); + error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp); + if (error) + return error; + } + + /* Clear the attr forks since metadata shouldn't have that. */ + if (xfs_inode_hasattr(sc->ip)) { + if (!dirty) { + dirty = true; + xfs_trans_ijoin(sc->tp, sc->ip, 0); + } + error = xrep_xattr_reset_fork(sc); + if (error) + return error; + } + + /* + * If we modified the inode, roll the transaction but don't rejoin the + * inode to the new transaction because xrep_bmap_data can do that. + */ + if (dirty) { + error = xfs_trans_roll(&sc->tp); + if (error) + return error; + dirty = false; + } + + /* + * Let's see if the forks need repair. We're going to open-code calls + * to the bmapbtd scrub and repair functions so that we can hang on to + * the resources that we already acquired instead of using the standard + * setup/teardown routines. + */ + smtype = sc->sm->sm_type; + smflags = sc->sm->sm_flags; + sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD; + sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; + + error = xchk_metadata_inode_forks(sc); + if (error || !xfs_scrub_needs_repair(sc->sm)) + goto out; + + /* + * Repair the data fork. This will potentially join the inode to the + * transaction. + */ + error = xrep_bmap_data(sc); + if (error) + goto out; + + /* + * Roll the transaction but don't rejoin the inode to the new + * transaction because we're done making changes to the inode. + */ + error = xfs_trans_roll(&sc->tp); + if (error) + goto out; + + /* Bail out if we still need repairs. */ + sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; + error = xchk_metadata_inode_forks(sc); + if (error) + goto out; + if (xfs_scrub_needs_repair(sc->sm)) + error = -EFSCORRUPTED; +out: + sc->sm->sm_type = smtype; + sc->sm->sm_flags = smflags; + return error; +} + +/* + * Make sure that the given range of the data fork of the metadata file being + * checked is mapped to written blocks. The caller must ensure that the inode + * is joined to the transaction. + */ +int +xrep_fallocate( + struct xfs_scrub *sc, + xfs_fileoff_t off, + xfs_filblks_t len) +{ + struct xfs_bmbt_irec map; + xfs_fileoff_t end = off + len; + int nmaps; + int error = 0; + + error = xrep_ino_dqattach(sc); + if (error) + return error; + + while (off < len) { + /* + * If we have a real extent mapping this block then we're + * in ok shape. + */ + nmaps = 1; + error = xfs_bmapi_read(sc->ip, off, end - off, &map, &nmaps, + XFS_DATA_FORK); + if (error) + break; + + if (nmaps == 1 && xfs_bmap_is_written_extent(&map)) { + off += map.br_startblock; + continue; + } + + /* + * If we find a delalloc reservation then something is very + * very wrong. Bail out. + */ + if (map.br_startblock == DELAYSTARTBLOCK) + return -EFSCORRUPTED; + + /* + * Make sure this rtsum block has a real zeroed extent + * allocated to it. + */ + nmaps = 1; + error = xfs_bmapi_write(sc->tp, sc->ip, off, end - off, + XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map, + &nmaps); + if (error) + break; + + error = xrep_roll_trans(sc); + if (error) + break; + off += map.br_startblock; + } + + return error; +} + +/* + * Write a number of bytes from the xfile into the metadata file being + * examined. The copybuf must be large enough to hold one filesystem block's + * worth of data. The caller must join the inode to the transaction. + */ +int +xrep_set_file_contents( + struct xfs_scrub *sc, + const struct xfs_buf_ops *ops, + enum xfs_blft type, + xfs_fileoff_t isize) +{ + LIST_HEAD(buffers_list); + struct xfs_bmbt_irec map; + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + xfs_rtblock_t off = 0; + loff_t pos = 0; + unsigned int nr_buffers = 0; + int nmaps; + int error = 0; + + ASSERT(S_ISREG(VFS_I(sc->ip)->i_mode)); + + for (; pos < isize; pos += mp->m_sb.sb_blocksize, off++) { + size_t count; + + /* Read block mapping for this file block. */ + nmaps = 1; + error = xfs_bmapi_read(sc->ip, off, 1, &map, &nmaps, 0); + if (error) + goto out; + if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) { + error = -EFSCORRUPTED; + goto out; + } + + /* Get the metadata buffer for this offset in the file. */ + error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, map.br_startblock), + mp->m_bsize, 0, &bp); + if (error) + goto out; + bp->b_ops = ops; + xfs_trans_buf_set_type(sc->tp, bp, type); + + /* Read in a block's worth of data from the xfile. */ + count = min_t(loff_t, isize - pos, mp->m_sb.sb_blocksize); + error = xfile_pread(sc->xfile, bp->b_addr, count, pos); + if (error) { + xfs_trans_brelse(sc->tp, bp); + goto out; + } + + /* + * Put this buffer on the delwri list so we can write them all + * out in batches. + */ + xfs_buf_delwri_queue(bp, &buffers_list); + xfs_trans_brelse(sc->tp, bp); + nr_buffers++; + + /* + * If we have more than 256K of data to write out, flush it to + * disk so we don't use up too much memory. + */ + if (XFS_FSB_TO_B(mp, nr_buffers) > 262144) { + error = xfs_buf_delwri_submit(&buffers_list); + if (error) + goto out; + nr_buffers = 0; + } + } + + /* + * Write the new blocks to disk. If the ordered list isn't empty after + * that, then something went wrong and we have to fail. This should + * never happen, but we'll check anyway. + */ + error = xfs_buf_delwri_submit(&buffers_list); + if (error) + goto out; + if (!list_empty(&buffers_list)) { + ASSERT(list_empty(&buffers_list)); + return -EIO; + } + + /* Set the new inode size, if needed. */ + if (sc->ip->i_d.di_size != isize) { + sc->ip->i_d.di_size = isize; + xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); + return xrep_roll_trans(sc); + } + + return 0; +out: + xfs_buf_delwri_cancel(&buffers_list); + return error; +} diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 58caaa07843b..207b337d6c29 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -20,6 +20,9 @@ static inline int xrep_notsupported(struct xfs_scrub *sc) /* Repair helpers */ +enum xfs_blft; +struct xbitmap; + int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc); void xrep_failure(struct xfs_mount *mp); int xrep_roll_ag_trans(struct xfs_scrub *sc); @@ -33,8 +36,13 @@ int xrep_alloc_ag_block(struct xfs_scrub *sc, int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb, struct xfs_buf **bpp, xfs_btnum_t btnum, const struct xfs_buf_ops *ops); +int xrep_fallocate(struct xfs_scrub *sc, xfs_fileoff_t off, xfs_filblks_t len); -struct xbitmap; +typedef int (*xrep_setfile_getbuf_fn)(struct xfs_scrub *sc, + xfs_fileoff_t off, struct xfs_buf **bpp); +int xrep_set_file_contents(struct xfs_scrub *sc, + const struct xfs_buf_ops *ops, enum xfs_blft type, + xfs_fileoff_t isize); int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink); int xrep_reap_extents(struct xfs_scrub *sc, struct xbitmap *exlist, @@ -57,6 +65,8 @@ int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp, void xrep_force_quotacheck(struct xfs_scrub *sc, uint dqtype); int xrep_ino_dqattach(struct xfs_scrub *sc); int xrep_reset_perag_resv(struct xfs_scrub *sc); +int xrep_xattr_reset_fork(struct xfs_scrub *sc); +int xrep_metadata_inode_forks(struct xfs_scrub *sc); /* Metadata revalidators */ @@ -80,6 +90,12 @@ int xrep_symlink(struct xfs_scrub *sc); int xrep_dir(struct xfs_scrub *sc); int xrep_xattr(struct xfs_scrub *sc); +#ifdef CONFIG_XFS_RT +int xrep_rtsummary(struct xfs_scrub *sc); +#else +# define xrep_rtsummary xrep_notsupported +#endif /* CONFIG_XFS_RT */ + struct xrep_newbt_resv { /* Link to list of extents that we've reserved. */ struct list_head list; @@ -185,6 +201,7 @@ xrep_reset_perag_resv( #define xrep_symlink xrep_notsupported #define xrep_dir xrep_notsupported #define xrep_xattr xrep_notsupported +#define xrep_rtsummary xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 67a4bd9a1e89..b9a44cb89523 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -57,6 +57,7 @@ xchk_setup_rtsummary( struct xfs_inode *ip) { struct xfs_mount *mp = sc->mp; + unsigned long long resblks = 0; int error; /* @@ -67,7 +68,17 @@ xchk_setup_rtsummary( if (IS_ERR(sc->xfile)) return PTR_ERR(sc->xfile); - error = xchk_trans_alloc(sc, 0); + /* + * If we're doing a repair, we reserve 2x the summary blocks: once for + * the new summary contents and again for the bmbt blocks and the + * remapping operation. + */ + if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { + resblks = XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize) * 2; + if (resblks > UINT_MAX) + return -EOPNOTSUPP; + } + error = xchk_trans_alloc(sc, resblks); if (error) return error; diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c new file mode 100644 index 000000000000..33cdb5a0bfd8 --- /dev/null +++ b/fs/xfs/scrub/rtsummary_repair.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_btree.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_rtalloc.h" +#include "xfs_inode.h" +#include "xfs_bit.h" +#include "xfs_bmap.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" + +/* Repair the realtime summary. */ +int +xrep_rtsummary( + struct xfs_scrub *sc) +{ + int error; + + /* Make sure any problems with the fork are fixed. */ + error = xrep_metadata_inode_forks(sc); + if (error) + return error; + + /* Make sure we have space allocated for the entire summary file. */ + xfs_trans_ijoin(sc->tp, sc->ip, 0); + error = xrep_fallocate(sc, 0, XFS_B_TO_FSB(sc->mp, sc->mp->m_rsumsize)); + if (error) + return error; + + /* Copy the rtsummary file that we generated. */ + return xrep_set_file_contents(sc, &xfs_rtbuf_ops, + XFS_BLFT_RTSUMMARY_BUF, sc->mp->m_rsumsize); +} diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index d82589798550..6aac6d410623 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -325,7 +325,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .setup = xchk_setup_rtsummary, .scrub = xchk_rtsummary, .has = xfs_sb_version_hasrealtime, - .repair = xrep_notsupported, + .repair = xrep_rtsummary, }, [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ .type = ST_FS, @@ -544,9 +544,8 @@ retry_op: if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - needs_fix = (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | - XFS_SCRUB_OFLAG_XCORRUPT | - XFS_SCRUB_OFLAG_PREEN)); + needs_fix = xfs_scrub_needs_repair(sc.sm); + /* * If userspace asked for a repair but it wasn't necessary, * report that back to userspace. |