diff options
author | Darrick J. Wong <djwong@kernel.org> | 2021-09-01 11:25:16 -0700 |
---|---|---|
committer | Darrick J. Wong <djwong@kernel.org> | 2021-12-15 17:29:28 -0800 |
commit | 83ab5f54660561bbd5bac55eb5e8241dc00b99f3 (patch) | |
tree | 83651c6e37725e249335518718258f9a00585c19 /fs/xfs/scrub | |
parent | 7b93e94de491367d09e6647d3223aae40c55fe3a (diff) |
xfs: teach scrub to check file nlinks
Copy-pasta the online quotacheck code to check inode link counts too.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/xfs/scrub')
-rw-r--r-- | fs/xfs/scrub/common.h | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/health.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/nlinks.c | 744 | ||||
-rw-r--r-- | fs/xfs/scrub/nlinks.h | 56 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 6 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.h | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 132 |
8 files changed, 942 insertions, 0 deletions
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 04f7e1731458..f2e1cf719c06 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -132,6 +132,7 @@ xchk_setup_quotacheck(struct xfs_scrub *sc) } #endif int xchk_setup_fscounters(struct xfs_scrub *sc); +int xchk_setup_nlinks(struct xfs_scrub *sc); void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa); int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno, diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 88a3969a9161..49a806400dac 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -109,6 +109,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK }, [XFS_SCRUB_TYPE_RTRMAPBT] = { XHG_RT, XFS_SICK_RT_RMAPBT }, [XFS_SCRUB_TYPE_RTREFCBT] = { XHG_RT, XFS_SICK_RT_REFCNTBT }, + [XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS }, }; /* Return the health status mask for this scrub type. */ diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c new file mode 100644 index 000000000000..aa5a3237a34f --- /dev/null +++ b/fs/xfs/scrub/nlinks.c @@ -0,0 +1,744 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2021 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_iwalk.h" +#include "xfs_ialloc.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_ag.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/repair.h" +#include "scrub/xfarray.h" +#include "scrub/iscan.h" +#include "scrub/nlinks.h" +#include "scrub/trace.h" + +/* + * Live Inode Link Count Checking + * ============================== + * + * Inode link counts are "summary" metadata, in the sense that they are + * computed as the number of directory entries referencing each file on the + * filesystem. Therefore, we compute the correct link counts by creating a + * shadow link count structure and walking every inode. + * + * Because we are scanning a live filesystem, it's possible that another thread + * will try to update the link counts for an inode that we've already scanned. + * This will cause our counts to be incorrect. Therefore, we hook all inode + * link count updates when the change is made to the incore inode. By + * shadowing transaction updates in this manner, live nlink check can ensure by + * locking the inode and the shadow structure that its own copies are not out + * of date. + * + * Note that we use srcu notifier hooks to minimize the overhead when live + * nlinks is /not/ running. Locking order for nlink observations is inode + * ILOCK -> iscan_lock/xchk_nlink_ctrs lock. + */ + +/* Set us up to scrub inode link counts. */ +int +xchk_setup_nlinks( + struct xfs_scrub *sc) +{ + sc->buf = kmem_zalloc(sizeof(struct xchk_nlink_ctrs), + KM_NOFS | KM_MAYFAIL); + if (!sc->buf) + return -ENOMEM; + + return xchk_setup_fs(sc); +} + +/* Update incore link count information. Caller must hold the nlinks lock. */ +STATIC int +xchk_nlinks_update_incore( + struct xchk_nlink_ctrs *xnc, + struct xfs_inode *dp, + bool parent_to_child, + xfs_ino_t ino, + int delta) +{ + struct xchk_nlink nl; + int error; + + if (!xnc->nlinks) + return 0; + + error = xfarray_load_sparse(xnc->nlinks, ino, &nl); + if (error) + return error; + + if (parent_to_child) + nl.parent += delta; + else + nl.child += delta; + + error = xfarray_store(xnc->nlinks, ino, &nl); + if (error == -EFBIG) { + /* + * EFBIG means we tried to store data at too high a byte offset + * in the sparse array. IOWs, we cannot complete the check and + * must notify userspace that the check was incomplete. + */ + xchk_set_incomplete(xnc->sc); + error = -ECANCELED; + } + return error; +} + +/* Read the observed link count for comparison with the actual inode. */ +STATIC int +xchk_nlinks_comparison_read( + struct xchk_nlink_ctrs *xnc, + xfs_ino_t ino, + struct xchk_nlink *obs) +{ + struct xchk_nlink nl; + int error; + + error = xfarray_load_sparse(xnc->nlinks, ino, &nl); + if (error) + return error; + + nl.flags |= XCHK_NLINK_COMPARE_SCANNED; + + error = xfarray_store(xnc->nlinks, ino, &nl); + if (error == -EFBIG) { + /* + * EFBIG means we tried to store data at too high a byte offset + * in the sparse array. IOWs, we cannot complete the check and + * must notify userspace that the check was incomplete. + */ + xchk_set_incomplete(xnc->sc); + return -ECANCELED; + } + if (error) + return error; + + obs->parent = nl.parent; + obs->child = nl.child; + return 0; +} + +/* + * Apply a link count change from the regular filesystem into our shadow link + * count structure. + */ +STATIC int +xchk_nlinks_live_update( + struct notifier_block *nb, + unsigned long arg, + void *data) +{ + struct xfs_nlink_delta_params *p = data; + struct xchk_nlink_ctrs *xnc; + bool parent_to_child; + int error; + + parent_to_child = (arg == XFS_PARENT_NLINK_DELTA); + xnc = container_of(nb, struct xchk_nlink_ctrs, nlink_delta_hook); + + if (!xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) + return NOTIFY_DONE; + + trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, parent_to_child, + p->ino, p->delta); + + mutex_lock(&xnc->lock); + error = xchk_nlinks_update_incore(xnc, p->dp, parent_to_child, p->ino, + p->delta); + mutex_unlock(&xnc->lock); + if (error) + xchk_iscan_abort(&xnc->collect_iscan); + + return NOTIFY_DONE; +} + +struct xchk_walk_dir { + struct dir_context dir_iter; + struct xchk_nlink_ctrs *xnc; + struct xfs_inode *dp; +}; + +/* Bump the observed link count for the inode referenced by this entry. */ +STATIC int +xchk_nlinks_walk_dir( + struct dir_context *dir_iter, + const char *name, + int namelen, + loff_t pos, + u64 ino, + unsigned type) +{ + struct xchk_walk_dir *xwd; + struct xchk_nlink_ctrs *xnc; + bool parent_to_child = true; + int error = -ECANCELED; + + xwd = container_of(dir_iter, struct xchk_walk_dir, dir_iter); + xnc = xwd->xnc; + + if (namelen == 0) { + /* Shouldn't be any zero-length dirents... */ + xchk_set_incomplete(xnc->sc); + return -ECANCELED; + } else if (namelen == 1 && name[0] == '.') { + /* + * The dot entry has to point to the directory, and we account + * it as a "child" pointing to its parent. + */ + if (ino != xwd->dp->i_ino) { + xchk_set_incomplete(xnc->sc); + return -ECANCELED; + } + parent_to_child = false; + } else if (namelen == 2 && name[0] == '.' && name[1] == '.') { + /* dotdot means child pointing to parent */ + parent_to_child = false; + } + + /* Update the shadow link counts if we haven't already failed. */ + + if (xchk_iscan_aborted(&xnc->collect_iscan)) + goto out_incomplete; + + trace_xchk_nlinks_walk_dir(xnc->sc->mp, xwd->dp, parent_to_child, ino, + name, namelen); + mutex_lock(&xnc->lock); + error = xchk_nlinks_update_incore(xnc, xwd->dp, parent_to_child, ino, + 1); + mutex_unlock(&xnc->lock); + if (error) + goto out_abort; + + return 0; + +out_abort: + xchk_iscan_abort(&xnc->collect_iscan); +out_incomplete: + xchk_set_incomplete(xnc->sc); + return error; +} + +/* Bump the observed link counts of every entry in this directory. */ +STATIC int +xchk_nlinks_dir( + struct xchk_nlink_ctrs *xnc, + struct xfs_inode *dp) +{ + struct xfs_scrub *sc = xnc->sc; + struct xchk_walk_dir xwd = { + .dir_iter.actor = xchk_nlinks_walk_dir, + .dir_iter.pos = 0, + .xnc = xnc, + .dp = dp, + }; + loff_t oldpos; + size_t bufsize; + unsigned int lock_mode; + int error = 0; + + /* Lock out the VFS from changing this directory while we walk it. */ + xfs_ilock(dp, XFS_IOLOCK_SHARED); + + /* + * The dotdot entry of an unlinked directory still points to the last + * parent, but the parent no longer links to this directory. Skip the + * directory to avoid overcounting. + */ + if (VFS_I(dp)->i_nlink == 0) + goto out; + + /* + * If there are any blocks, read-ahead block 0 as we're almost certain + * to have the next operation be a read there. This is how we + * guarantee that the directory's extent map has been loaded, if there + * is one. + */ + lock_mode = xfs_ilock_data_map_shared(dp); + if (dp->i_df.if_nextents > 0) + error = xfs_dir3_data_readahead(dp, 0, 0); + xfs_iunlock(dp, lock_mode); + if (error) + goto out; + + /* + * Bump link counts for every dirent we see. Userspace usually asks + * for a 32k buffer, so we will too. + */ + bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, dp->i_disk_size); + do { + oldpos = xwd.dir_iter.pos; + error = xfs_readdir(sc->tp, dp, &xwd.dir_iter, bufsize); + } while (!error && oldpos < xwd.dir_iter.pos); + + xchk_iscan_mark_visited(&xnc->collect_iscan, dp); +out: + xfs_iunlock(dp, XFS_IOLOCK_SHARED); + return error; +} + +/* If this looks like a valid pointer, count it. */ +static inline int +xchk_nlinks_metafile( + struct xchk_nlink_ctrs *xnc, + xfs_ino_t ino) +{ + if (!xfs_verify_ino(xnc->sc->mp, ino)) + return 0; + + trace_xchk_nlinks_metafile(xnc->sc->mp, ino); + return xchk_nlinks_update_incore(xnc, NULL, true, ino, 1); +} + +/* Bump the link counts of metadata files rooted in the superblock. */ +STATIC int +xchk_nlinks_metafiles( + struct xchk_nlink_ctrs *xnc) +{ + struct xfs_mount *mp = xnc->sc->mp; + int error = -ECANCELED; + + + if (xchk_iscan_aborted(&xnc->collect_iscan)) + goto out_incomplete; + + mutex_lock(&xnc->lock); + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_rbmino); + if (error) + goto out_abort; + + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_rsumino); + if (error) + goto out_abort; + + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_uquotino); + if (error) + goto out_abort; + + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_gquotino); + if (error) + goto out_abort; + + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_pquotino); + if (error) + goto out_abort; + mutex_unlock(&xnc->lock); + + return 0; + +out_abort: + mutex_unlock(&xnc->lock); + xchk_iscan_abort(&xnc->collect_iscan); +out_incomplete: + xchk_set_incomplete(xnc->sc); + return error; +} + +/* Advance the collection scan cursor for this file. */ +static inline int +xchk_nlinks_file( + struct xchk_nlink_ctrs *xnc, + struct xfs_inode *ip) +{ + xfs_ilock(ip, XFS_IOLOCK_SHARED); + xchk_iscan_mark_visited(&xnc->collect_iscan, ip); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return 0; +} + +/* Walk all directories and count inode links. */ +STATIC int +xchk_nlinks_collect( + struct xchk_nlink_ctrs *xnc) +{ + struct xfs_scrub *sc = xnc->sc; + int error; + + /* Count the rt and quota files if they're rooted in the superblock. */ + if (!xfs_has_metadir(sc->mp)) { + error = xchk_nlinks_metafiles(xnc); + if (error) + return error; + } + + /* + * Set up for a potentially lengthy filesystem scan by reducing our + * transaction resource usage for the duration. Specifically: + * + * Cancel the transaction to release the log grant space while we scan + * the filesystem. + * + * Create a new empty transaction to eliminate the possibility of the + * inode scan deadlocking on cyclical metadata. + * + * We pass the empty transaction to the file scanning function to avoid + * repeatedly cycling empty transactions. This can be done even though + * we take the IOLOCK to quiesce the file because empty transactions + * do not take sb_internal. + */ + xchk_trans_cancel(sc); + error = xchk_trans_alloc_empty(sc); + if (error) + return error; + + while ((error = xchk_iscan_advance(sc, &xnc->collect_iscan)) == 1) { + struct xfs_inode *ip; + + error = xchk_iscan_iget(sc, &xnc->collect_iscan, &ip); + if (error == -EAGAIN) + continue; + if (error) + break; + + if (S_ISDIR(VFS_I(ip)->i_mode)) + error = xchk_nlinks_dir(xnc, ip); + else + error = xchk_nlinks_file(xnc, ip); + xchk_irele(sc, ip); + if (error) + break; + + if (xchk_should_terminate(sc, &error)) + break; + } + + if (error == -ECANCELED) + xchk_set_incomplete(sc); + if (error) + return error; + + /* + * Switch out for a real transaction in preparation for building a new + * tree. + */ + xchk_trans_cancel(sc); + return xchk_setup_fs(sc); +} + +/* Check our link count against an inode. */ +STATIC int +xchk_nlinks_compare_inode( + struct xchk_nlink_ctrs *xnc, + struct xfs_inode *ip) +{ + struct xchk_nlink obs; + struct xfs_scrub *sc = xnc->sc; + uint64_t total_links; + unsigned int actual_nlink; + int error; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + mutex_lock(&xnc->lock); + + if (xchk_iscan_aborted(&xnc->collect_iscan)) { + xchk_set_incomplete(xnc->sc); + error = -ECANCELED; + goto out_scanlock; + } + + error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs); + if (error) + goto out_scanlock; + total_links = xchk_nlink_total(&obs); + actual_nlink = VFS_I(ip)->i_nlink; + + trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs); + + /* We found more than the maxiumum possible link count. */ + if (total_links > U32_MAX) + xchk_ino_set_corrupt(sc, ip->i_ino); + + /* Link counts should match. */ + if (total_links != actual_nlink) + xchk_ino_set_corrupt(sc, ip->i_ino); + + /* + * Directories with nonzero link count must have at least one child + * (dot entry). The collection phase ignores directories with zero + * link count, so we ignore them here too. + */ + if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0 && obs.child < 1) + xchk_ino_set_corrupt(sc, ip->i_ino); + + /* Non-directories should not have children */ + if (!S_ISDIR(VFS_I(ip)->i_mode) && obs.child != 0) + xchk_ino_set_corrupt(sc, ip->i_ino); + + if (ip == sc->mp->m_metadirip || ip == sc->mp->m_rootip) { + /* Nothing should point to the directory tree roots. */ + if (obs.parent != 0) + xchk_ino_set_corrupt(sc, ip->i_ino); + + /* + * Directory tree roots should have at least two "child" + * references to cover dot and dotdot. + */ + if (obs.child < 2) + xchk_ino_set_corrupt(sc, ip->i_ino); + } else if (obs.parent == 0) { + /* Non-root linked files should have a parent. */ + if (actual_nlink != 0) + xchk_ino_set_corrupt(sc, ip->i_ino); + } + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + error = -EFSCORRUPTED; + +out_scanlock: + mutex_unlock(&xnc->lock); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return error; +} + +/* + * Check our link count against an inode that wasn't checked previously. This + * is intended to catch directories with dangling links, though we could be + * racing with inode allocation in other threads. + */ +STATIC int +xchk_nlinks_compare_inum( + struct xchk_nlink_ctrs *xnc, + xfs_ino_t ino) +{ + struct xchk_nlink obs; + struct xfs_mount *mp = xnc->sc->mp; + struct xfs_trans *tp = xnc->sc->tp; + struct xfs_buf *agi_bp; + struct xfs_inode *ip; + int error; + + /* + * Lock the AGI to the transaction just in case the lookup fails and we + * need something to prevent inode allocation while we reconfirm the + * observed nlink value. + */ + error = xfs_ialloc_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ino), &agi_bp); + if (error) + return error; + + error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &ip); + if (error == 0) { + /* Actually got an inode, so use the inode compare. */ + xfs_trans_brelse(tp, agi_bp); + error = xchk_nlinks_compare_inode(xnc, ip); + xchk_irele(xnc->sc, ip); + return error; + } + if (error == -ENOENT || error == -EINVAL) { + /* No inode was found; check for zero link count below. */ + error = 0; + } + if (error) + goto out_agi; + + if (xchk_iscan_aborted(&xnc->collect_iscan)) { + xchk_set_incomplete(xnc->sc); + error = -ECANCELED; + goto out_agi; + } + + mutex_lock(&xnc->lock); + error = xchk_nlinks_comparison_read(xnc, ino, &obs); + if (error) + goto out_scanlock; + + trace_xchk_nlinks_check_zero(mp, ino, &obs); + + /* + * If we can't grab the inode, the link count had better be zero. We + * still hold the AGI to prevent inode allocation/freeing. + */ + if (xchk_nlink_total(&obs) != 0) { + xchk_ino_set_corrupt(xnc->sc, ino); + error = -ECANCELED; + } + +out_scanlock: + mutex_unlock(&xnc->lock); +out_agi: + xfs_trans_brelse(tp, agi_bp); + return error; +} + +/* Compare the link counts we observed against the live information. */ +STATIC int +xchk_nlinks_compare( + struct xchk_nlink_ctrs *xnc) +{ + struct xchk_nlink nl; + struct xfs_scrub *sc = xnc->sc; + uint64_t nr = 0; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return 0; + + /* + * Create a new empty transaction so that we can advance the iscan + * cursor without deadlocking if the inobt has a cycle and push on the + * inactivation workqueue. + */ + xchk_trans_cancel(sc); + error = xchk_trans_alloc_empty(sc); + if (error) + return error; + + /* + * Use the inobt to walk all allocated inodes to compare the link + * counts. If we can't grab the inode, we'll try again in the second + * step. + */ + xchk_iscan_start(&xnc->compare_iscan); + while ((error = xchk_iscan_advance(sc, &xnc->compare_iscan)) == 1) { + struct xfs_inode *ip; + + error = xchk_iscan_iget(sc, &xnc->compare_iscan, &ip); + if (error == -ECANCELED) + continue; + if (error) + break; + + error = xchk_nlinks_compare_inode(xnc, ip); + xchk_iscan_mark_visited(&xnc->compare_iscan, ip); + xchk_irele(sc, ip); + if (error) + break; + + if (xchk_should_terminate(sc, &error)) + break; + } + xchk_iscan_finish(&xnc->compare_iscan); + if (error) + return error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return 0; + + /* + * Walk all the non-null nlink observations that weren't checked in the + * previous step. + */ + mutex_lock(&xnc->lock); + while ((error = xfarray_iter(xnc->nlinks, &nr, &nl)) == 1) { + xfs_ino_t ino = nr - 1; + + if (nl.flags & XCHK_NLINK_COMPARE_SCANNED) + continue; + + mutex_unlock(&xnc->lock); + + error = xchk_nlinks_compare_inum(xnc, ino); + if (error) + return error; + + if (xchk_should_terminate(xnc->sc, &error)) + return error; + + mutex_lock(&xnc->lock); + } + mutex_unlock(&xnc->lock); + + return error; +} + +/* Tear down everything associated with a nlinks check. */ +static void +xchk_nlinks_teardown_scan( + struct xchk_nlink_ctrs *xnc) +{ + /* Discourage any hook functions that might be running. */ + xchk_iscan_abort(&xnc->collect_iscan); + + xfs_hook_del(&xnc->sc->mp->m_nlink_delta_hooks, &xnc->nlink_delta_hook); + + xfarray_destroy(xnc->nlinks); + xnc->nlinks = NULL; + + xchk_iscan_finish(&xnc->collect_iscan); + mutex_destroy(&xnc->lock); + xnc->sc = NULL; +} + +/* + * Scan all inodes in the entire filesystem to generate link count data. If + * the scan is successful, the counts will be left alive for a repair. If any + * error occurs, we'll tear everything down. + */ +STATIC int +xchk_nlinks_setup_scan( + struct xfs_scrub *sc, + struct xchk_nlink_ctrs *xnc) +{ + int error; + + ASSERT(xnc->sc == NULL); + xnc->sc = sc; + + mutex_init(&xnc->lock); + xnc->collect_iscan.iget_tries = 20; + xnc->collect_iscan.iget_retry_delay = HZ / 10; + xchk_iscan_start(&xnc->collect_iscan); + + error = xfarray_create(sc->mp, "file link counts", + sizeof(struct xchk_nlink), &xnc->nlinks); + if (error) + goto out_teardown; + + /* + * Hook into the bumplink/droplink code. The hook only triggers for + * inodes that were already scanned, and the scanner thread takes each + * inode's ILOCK, which means that any in-progress inode updates will + * finish before we can scan the inode. + */ + error = xfs_hook_add(&sc->mp->m_nlink_delta_hooks, + &xnc->nlink_delta_hook, xchk_nlinks_live_update); + if (error) + goto out_teardown; + + /* Use deferred cleanup to pass the inode link count data to repair. */ + sc->buf_cleanup = (void (*)(void *))xchk_nlinks_teardown_scan; + return 0; + +out_teardown: + xchk_nlinks_teardown_scan(xnc); + return error; +} + +/* Scrub the link count of all inodes on the filesystem. */ +int +xchk_nlinks( + struct xfs_scrub *sc) +{ + struct xchk_nlink_ctrs *xnc = sc->buf; + int error = 0; + + /* Set ourselves up to check link counts on the live filesystem. */ + error = xchk_nlinks_setup_scan(sc, xnc); + if (error) + return error; + + /* Walk all inodes, picking up link count information. */ + error = xchk_nlinks_collect(xnc); + if (!xchk_xref_process_error(sc, 0, 0, &error)) + return error; + + /* Compare link counts. */ + error = xchk_nlinks_compare(xnc); + if (!xchk_xref_process_error(sc, 0, 0, &error)) + return error; + + return 0; +} diff --git a/fs/xfs/scrub/nlinks.h b/fs/xfs/scrub/nlinks.h new file mode 100644 index 000000000000..0ece2ab5dd38 --- /dev/null +++ b/fs/xfs/scrub/nlinks.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_NLINKS_H__ +#define __XFS_SCRUB_NLINKS_H__ + +/* Live link count control structure. */ +struct xchk_nlink_ctrs { + struct xfs_scrub *sc; + + /* Shadow link count data and its mutex. */ + struct xfarray *nlinks; + struct mutex lock; + + /* + * The collection step uses a separate iscan context from the compare + * step because the collection iscan coordinates live updates to the + * observation data while this scanner is running. The compare iscan + * is secondary and can be reinitialized as needed. + */ + struct xchk_iscan collect_iscan; + struct xchk_iscan compare_iscan; + + /* + * Hook into bumplink/droplink so that we can receive live updates + * from other writer threads. + */ + struct notifier_block nlink_delta_hook; +}; + +struct xchk_nlink { + /* Links from a parent directory to this inode. */ + xfs_nlink_t parent; + + /* Links from children of this inode (e.g. dot and dotdot). */ + xfs_nlink_t child; + + /* Record state flags */ + unsigned int flags; +}; + +/* This data item was seen by the check-time compare function. */ +#define XCHK_NLINK_COMPARE_SCANNED (1U << 0) + +/* Compute total link count, using large enough variables to detect overflow. */ +static inline uint64_t +xchk_nlink_total(const struct xchk_nlink *live) +{ + uint64_t ret = live->parent; + + return ret + live->child; +} + +#endif /* __XFS_SCRUB_NLINKS_H__ */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index e60f2e71dac0..efd44b31997d 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -393,6 +393,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .has = xfs_has_rtreflink, .repair = xrep_rtrefcountbt, }, + [XFS_SCRUB_TYPE_NLINKS] = { /* inode link counts */ + .type = ST_FS, + .setup = xchk_setup_nlinks, + .scrub = xchk_nlinks, + .repair = xrep_notsupported, + }, }; /* This isn't a stable feature, warn once per day. */ diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index f0a5acf7df48..d9ad8049c84d 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -201,6 +201,7 @@ xchk_quotacheck(struct xfs_scrub *sc) } #endif int xchk_fscounters(struct xfs_scrub *sc); +int xchk_nlinks(struct xfs_scrub *sc); /* cross-referencing helpers */ void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno, diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 2deee7fabb96..988b54eb2ebd 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -19,6 +19,7 @@ #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/iscan.h" +#include "scrub/nlinks.h" /* Figure out which block the btree cursor was pointing to. */ static inline xfs_fsblock_t diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 10c43e03e133..0ee0c7fd7400 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -21,6 +21,7 @@ struct xfs_scrub; struct xfile; struct xfarray; struct xchk_iscan; +struct xchk_nlink; /* * ftrace's __print_symbolic requires that all enum values be wrapped in the @@ -97,6 +98,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER); { XFS_SCRUB_TYPE_HEALTHY, "healthy" }, \ { XFS_SCRUB_TYPE_RTRMAPBT, "rtrmapbt" }, \ { XFS_SCRUB_TYPE_RTREFCBT, "rtrefcountbt" }, \ + { XFS_SCRUB_TYPE_NLINKS, "nlinks" }, \ { XFS_SCRUB_TYPE_BARRIER, "barrier" } #define XFS_SCRUB_FLAG_STRINGS \ @@ -990,6 +992,136 @@ TRACE_EVENT(xchk_iscan_iget, __entry->cursor, __entry->visited, __entry->error) ); +TRACE_EVENT(xchk_nlinks_walk_dir, + TP_PROTO(struct xfs_mount *mp, struct xfs_inode *dp, + bool parent_to_child, xfs_ino_t ino, const char *name, + unsigned int namelen), + TP_ARGS(mp, dp, parent_to_child, ino, name, namelen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dir) + __field(bool, parent) + __field(xfs_ino_t, ino) + __field(unsigned int, namelen) + __dynamic_array(char, name, namelen) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->dir = dp->i_ino; + __entry->parent = parent_to_child; + __entry->ino = ino; + __entry->namelen = namelen; + memcpy(__get_str(name), name, namelen); + ), + TP_printk("dev %d:%d dir 0x%llx parent_to_child? %d ino 0x%llx name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dir, + __entry->parent, + __entry->ino, + __entry->namelen, + __get_str(name)) +); + +TRACE_EVENT(xchk_nlinks_metafile, + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino), + TP_ARGS(mp, ino), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->ino = ino; + ), + TP_printk("dev %d:%d ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino) +); + +TRACE_EVENT(xchk_nlinks_live_update, + TP_PROTO(struct xfs_mount *mp, struct xfs_inode *dp, + bool parent_to_child, xfs_ino_t ino, int delta), + TP_ARGS(mp, dp, parent_to_child, ino, delta), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dir) + __field(bool, parent) + __field(xfs_ino_t, ino) + __field(int, delta) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->dir = dp ? dp->i_ino : NULLFSINO; + __entry->parent = parent_to_child; + __entry->ino = ino; + __entry->delta = delta; + ), + TP_printk("dev %d:%d dir 0x%llx parent_to_child? %d ino 0x%llx nlink_delta %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dir, + __entry->parent, + __entry->ino, + __entry->delta) +); + +TRACE_EVENT(xchk_nlinks_check_zero, + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, + const struct xchk_nlink *live), + TP_ARGS(mp, ino, live), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_nlink_t, parent) + __field(xfs_nlink_t, child) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->ino = ino; + __entry->parent = live->parent; + __entry->child = live->child; + ), + TP_printk("dev %d:%d ino 0x%llx parent_links %u child_links %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->parent, + __entry->child) +); + +DECLARE_EVENT_CLASS(xchk_nlink_diff_class, + TP_PROTO(struct xfs_mount *mp, struct xfs_inode *ip, + const struct xchk_nlink *live), + TP_ARGS(mp, ip, live), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(umode_t, mode) + __field(xfs_nlink_t, nlink) + __field(xfs_nlink_t, parent) + __field(xfs_nlink_t, child) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->mode = VFS_I(ip)->i_mode; + __entry->nlink = VFS_I(ip)->i_nlink; + __entry->parent = live->parent; + __entry->child = live->child; + ), + TP_printk("dev %d:%d ino 0x%llx dir? %d nlink %u parent_links %u child_links %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + !!S_ISDIR(__entry->mode), + __entry->nlink, + __entry->parent, + __entry->child) +); +#define DEFINE_SCRUB_NLINK_DIFF_EVENT(name) \ +DEFINE_EVENT(xchk_nlink_diff_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_inode *ip, \ + const struct xchk_nlink *live), \ + TP_ARGS(mp, ip, live)) +DEFINE_SCRUB_NLINK_DIFF_EVENT(xchk_nlinks_compare_inode); + /* repair tracepoints */ #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) |