diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2020-03-16 16:33:22 -0700 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2020-06-01 21:16:30 -0700 |
commit | e235a1f0be11cbfeec1e4b52210ad7d5a56eab96 (patch) | |
tree | 0f069e2545154df8fcd53c5a3393f30261967fc9 | |
parent | 2a2c839a292ffebe99ee45ffa028e3b9a80be9b1 (diff) |
xfs: implement online scrubbing of rtsummary infoscrub-rtsummary_2020-06-01
Finish the realtime summary scrubber by adding the functions we need to
compute a fresh copy of the rtsummary info and comparing it to the copy
on disk.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/scrub/common.h | 10 | ||||
-rw-r--r-- | fs/xfs/scrub/rtbitmap.c | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/rtsummary.c | 315 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 8 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.h | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 32 |
6 files changed, 342 insertions, 28 deletions
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 2e50d146105d..3324c88f6a08 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -96,10 +96,16 @@ int xchk_setup_symlink(struct xfs_scrub *sc, int xchk_setup_parent(struct xfs_scrub *sc, struct xfs_inode *ip); #ifdef CONFIG_XFS_RT -int xchk_setup_rt(struct xfs_scrub *sc, struct xfs_inode *ip); +int xchk_setup_rtbitmap(struct xfs_scrub *sc, struct xfs_inode *ip); +int xchk_setup_rtsummary(struct xfs_scrub *sc, struct xfs_inode *ip); #else static inline int -xchk_setup_rt(struct xfs_scrub *sc, struct xfs_inode *ip) +xchk_setup_rtbitmap(struct xfs_scrub *sc, struct xfs_inode *ip) +{ + return -ENOENT; +} +static inline int +xchk_setup_rtsummary(struct xfs_scrub *sc, struct xfs_inode *ip) { return -ENOENT; } diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 12c9c8fc6c70..fb58c25f35ac 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -19,13 +19,13 @@ /* Set us up with the realtime metadata locked. */ int -xchk_setup_rt( +xchk_setup_rtbitmap( struct xfs_scrub *sc, struct xfs_inode *ip) { int error; - error = xchk_setup_fs(sc, ip); + error = xchk_trans_alloc(sc, 0); if (error) return error; diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 41c3634b8e2f..a70f6f0267bc 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -14,41 +14,312 @@ #include "xfs_trans.h" #include "xfs_rtalloc.h" #include "xfs_inode.h" +#include "xfs_bit.h" +#include "xfs_bmap.h" #include "scrub/scrub.h" #include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/xfile.h" -/* Scrub the realtime summary. */ +/* + * Realtime Summary + * ================ + * + * We check the realtime summary by scanning the realtime bitmap file to create + * a new summary file incore, and then we compare the computed version against + * the ondisk version. We use the 'xfile' functionality to store this + * (potentially large) amount of data in pageable memory. + */ + +struct xchk_rtsum_compute { + /* How far have we iterated through the rt extents? */ + xfs_rtblock_t rt_extent_nr; + + /* How many free rt extents have we seen? */ + xfs_rtblock_t rt_free_nr; + + /* block and bit offset of our current position in the rtbitmap. */ + xfs_fileoff_t off; + unsigned int bit; + + /* block and bit offset of the start of the most recent free rtext. */ + xfs_fileoff_t start_off; + unsigned int start_bit; + + /* Are we accumulating a free rtext? */ + bool in_extent; +}; + +/* Set us up to check the rtsummary file. */ int -xchk_rtsummary( - struct xfs_scrub *sc) +xchk_setup_rtsummary( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - struct xfs_inode *rsumip = sc->mp->m_rsumip; - struct xfs_inode *old_ip = sc->ip; - uint old_ilock_flags = sc->ilock_flags; - int error = 0; + struct xfs_mount *mp = sc->mp; + int error; /* - * We ILOCK'd the rt bitmap ip in the setup routine, now lock the - * rt summary ip in compliance with the rt inode locking rules. - * - * Since we switch sc->ip to rsumip we have to save the old ilock - * flags so that we don't mix up the inode state that @sc tracks. + * Create an xfile to construct a new rtsummary file. The xfile allows + * us to avoid pinning kernel memory for this purpose. */ - sc->ip = rsumip; + sc->xfile = xfile_create("rtsummary", mp->m_rsumsize); + if (IS_ERR(sc->xfile)) + return PTR_ERR(sc->xfile); + + error = xchk_trans_alloc(sc, 0); + if (error) + return error; + + /* Allocate a memory buffer for the summary comparison. */ + sc->buf = kmem_alloc_large(sc->mp->m_sb.sb_blocksize, KM_MAYFAIL); + if (!sc->buf) + return -ENOMEM; + + /* + * Locking order requires us to take the rtbitmap first. We must be + * careful to unlock it ourselves when we are done with the rtbitmap + * file since the scrub infrastructure won't do that for us. + */ + xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); + + /* ...and then we can lock the rtsummary inode. */ sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM; + sc->ip = sc->mp->m_rsumip; xfs_ilock(sc->ip, sc->ilock_flags); + return 0; +} + +/* Update the summary file to reflect the free extent that we've accumulated. */ +STATIC int +xchk_rtsum_record_free( + struct xfs_scrub *sc, + struct xchk_rtsum_compute *state) +{ + struct xfs_mount *mp = sc->mp; + loff_t pos; + uint64_t len; + unsigned int offs; + unsigned int log; + unsigned int bitsperblock = mp->m_sb.sb_blocksize * NBBY; + xfs_suminfo_t v = 0; + int error; + + /* Compute the relevant location in the rtsum file. */ + len = (state->off - state->start_off) * bitsperblock + + (state->bit - state->start_bit); + log = XFS_RTBLOCKLOG(len); + offs = XFS_SUMOFFS(mp, log, state->start_off); + + /* Read current rtsummary contents. */ + pos = sizeof(xfs_suminfo_t) * offs; + error = xfile_io(sc->xfile, XFILE_IO_READ, &pos, &v, + sizeof(xfs_suminfo_t)); + if (error) + return error; + + /* Bump the summary count... */ + v++; + pos = sizeof(xfs_suminfo_t) * offs; + trace_xchk_rtsum_record_free(mp, + state->start_off * bitsperblock + state->start_bit, + state->off * bitsperblock + state->bit - 1, + len, log, offs, v); + + /* ...and write it back. */ + error = xfile_io(sc->xfile, XFILE_IO_WRITE, &pos, &v, + sizeof(xfs_suminfo_t)); + if (error) + return error; + + state->in_extent = false; + return 0; +} + +static inline bool +xchk_rtsum_isset( + xfs_rtword_t *words, + unsigned int bit) +{ + return words[bit / (sizeof(*words) * NBBY)] & + (1ULL << (bit % (sizeof(*words) * NBBY))); +} + +/* Walk a single rtbitmap block looking for changes in the free status. */ +STATIC int +xchk_rtsum_process_bmblock( + struct xfs_scrub *sc, + xfs_fileoff_t block_off, + struct xchk_rtsum_compute *state) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + xfs_rtword_t *words; + unsigned int bitsperblock = mp->m_sb.sb_blocksize * NBBY; + int error = 0; + + if (xchk_should_terminate(sc, &error)) + return error; + + error = xfs_rtbuf_get(mp, sc->tp, block_off, 0, &bp); + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, block_off, + &error)) + return error; + + state->off = block_off; + words = (xfs_rtword_t *)bp->b_addr; + for (state->bit = 0; + state->bit < bitsperblock && + state->rt_extent_nr < mp->m_sb.sb_rextents; + state->bit++, state->rt_extent_nr++) { + if (xchk_rtsum_isset(words, state->bit)) { + state->rt_free_nr++; + if (!state->in_extent) { + state->start_off = block_off; + state->start_bit = state->bit; + state->in_extent = true; + } + } else if (state->in_extent) { + error = xchk_rtsum_record_free(sc, state); + if (error) + goto out_relse; + } + } + +out_relse: + xfs_trans_brelse(sc->tp, bp); + return error; +} + +/* + * Compute the realtime summary from the realtime bitmap. This is a kernel + * port of the defunct process_rtbitmap function in xfs_repair. + */ +STATIC int +xchk_rtsum_compute( + struct xfs_scrub *sc) +{ + struct xchk_rtsum_compute state = { 0 }; + struct xfs_mount *mp = sc->mp; + unsigned long long rtbmp_bytes; + xfs_fileoff_t off = 0; + xfs_fileoff_t end_off; + int error; + + rtbmp_bytes = howmany_64(mp->m_sb.sb_rextents, NBBY); + end_off = howmany_64(rtbmp_bytes, mp->m_sb.sb_blocksize); + + /* If the bitmap size doesn't match the computed size, bail. */ + if (roundup_64(rtbmp_bytes, mp->m_sb.sb_blocksize) != + mp->m_rbmip->i_d.di_size) + return -EFSCORRUPTED; + + for (off = 0; off < end_off; off++) { + error = xchk_rtsum_process_bmblock(sc, off, &state); + if (error) + return error; + if (state.rt_extent_nr == mp->m_sb.sb_rextents) + break; + } + if (state.in_extent) { + error = xchk_rtsum_record_free(sc, &state); + if (error) + return error; + } + + return 0; +} + +/* Compare the rtsummary file against the one we computed. */ +STATIC int +xchk_rtsum_compare( + struct xfs_scrub *sc) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + struct xfs_bmbt_irec map; + xfs_rtblock_t off; + loff_t pos; + int nmap; + int error = 0; + + for (off = 0, pos = 0; + pos < mp->m_rsumsize; + pos += mp->m_sb.sb_blocksize, off++) { + loff_t ppos = pos; + size_t count; + + if (xchk_should_terminate(sc, &error) || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + break; + + /* Make sure we have a written extent. */ + nmap = 1; + error = xfs_bmapi_read(mp->m_rsumip, off, 1, &map, &nmap, + XFS_DATA_FORK); + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error)) + break; + + if (nmap != 1 || !xfs_bmap_is_real_extent(&map)) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off); + break; + } + + /* Read a block's worth of ondisk rtsummary file. */ + error = xfs_rtbuf_get(mp, sc->tp, off, 1, &bp); + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error)) + break; + + /* Read a block's worth of computed rtsummary file. */ + count = min_t(loff_t, mp->m_rsumsize - pos, + mp->m_sb.sb_blocksize); + error = xfile_io(sc->xfile, XFILE_IO_READ, &ppos, sc->buf, + count); + if (error) { + xfs_trans_brelse(sc->tp, bp); + break; + } + + if (memcmp(bp->b_addr, sc->buf, count) != 0) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off); + + xfs_trans_brelse(sc->tp, bp); + } + + return error; +} + +/* Scrub the realtime summary. */ +int +xchk_rtsummary( + struct xfs_scrub *sc) +{ + struct xfs_mount *mp = sc->mp; + int error = 0; + /* Invoke the fork scrubber. */ error = xchk_metadata_inode_forks(sc); if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) - goto out; - - /* XXX: implement this some day */ - xchk_set_incomplete(sc); -out: - /* Switch back to the rtbitmap inode and lock flags. */ - xfs_iunlock(sc->ip, sc->ilock_flags); - sc->ilock_flags = old_ilock_flags; - sc->ip = old_ip; + goto out_rbm; + + /* Construct the new summary file from the rtbitmap. */ + error = xchk_rtsum_compute(sc); + if (error == -EFSCORRUPTED) { + /* + * EFSCORRUPTED means the rtbitmap is corrupt, which is an xref + * error since we're checking the summary file. + */ + xchk_ino_xref_set_corrupt(sc, mp->m_rbmip->i_ino); + error = 0; + goto out_rbm; + } + + /* Does the computed summary file match the actual rtsummary file? */ + error = xchk_rtsum_compare(sc); + +out_rbm: + /* Unlock the rtbitmap since we're done with it. */ + xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); return error; } diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index bcc3bf8ea813..6aaae5d39072 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -177,6 +177,10 @@ xchk_teardown( mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock); sc->flags &= ~XCHK_HAS_QUOTAOFFLOCK; } + if (sc->xfile) { + fput(sc->xfile); + sc->xfile = NULL; + } if (sc->buf) { kmem_free(sc->buf); sc->buf = NULL; @@ -306,14 +310,14 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { }, [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ .type = ST_FS, - .setup = xchk_setup_rt, + .setup = xchk_setup_rtbitmap, .scrub = xchk_rtbitmap, .has = xfs_sb_version_hasrealtime, .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ .type = ST_FS, - .setup = xchk_setup_rt, + .setup = xchk_setup_rtsummary, .scrub = xchk_rtsummary, .has = xfs_sb_version_hasrealtime, .repair = xrep_notsupported, diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index ad1ceb44a628..6577ad22151d 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -61,6 +61,7 @@ struct xfs_scrub { struct xfs_trans *tp; struct xfs_inode *ip; void *buf; + struct file *xfile; uint ilock_flags; /* See the XCHK/XREP state flags below. */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 3449d01e459f..2965c30ff319 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -651,6 +651,38 @@ TRACE_EVENT(xchk_fscounters_within_range, __entry->old_value) ) +TRACE_EVENT(xchk_rtsum_record_free, + TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start, xfs_rtblock_t end, + uint64_t len, unsigned int log, loff_t pos, xfs_suminfo_t v), + TP_ARGS(mp, start, end, len, log, pos, v), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_rtblock_t, start) + __field(xfs_rtblock_t, end) + __field(unsigned long long, len) + __field(unsigned int, log) + __field(loff_t, pos) + __field(xfs_suminfo_t, v) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->start = start; + __entry->end = end; + __entry->len = len; + __entry->log = log; + __entry->pos = pos; + __entry->v = v; + ), + TP_printk("dev %d:%d start %llu end %llu len %llu log %u pos %lld v %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->start, + __entry->end, + __entry->len, + __entry->log, + __entry->pos, + __entry->v) +) + /* repair tracepoints */ #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) |