diff options
author | Darrick J. Wong <djwong@kernel.org> | 2021-09-01 11:25:16 -0700 |
---|---|---|
committer | Darrick J. Wong <djwong@kernel.org> | 2021-09-17 18:55:29 -0700 |
commit | e2ae66671e52f06e6dba1c0b4339897759c5e590 (patch) | |
tree | 7cabcdef5a81ab02f8edf5fcd4ede093c5f94404 | |
parent | a6fcc8a01b50eeb4339692233bd4ab83ab7ce7f6 (diff) |
xfs: teach scrub to check file nlinks
Copy-pasta the online quotacheck code to check inode link counts too.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r-- | fs/xfs/Makefile | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_fs.h | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/common.h | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/health.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/nlinks.c | 573 | ||||
-rw-r--r-- | fs/xfs/scrub/nlinks.h | 28 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 6 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.h | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 50 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 104 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_symlink.c | 1 |
14 files changed, 792 insertions, 2 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index d54a346c4856..17fb0e9e2e9c 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -155,6 +155,8 @@ xfs-y += $(addprefix scrub/, \ health.o \ ialloc.o \ inode.o \ + iscan.o \ + nlinks.o \ parent.o \ refcount.o \ rmap.o \ @@ -171,7 +173,6 @@ xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ ) xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \ - iscan.o \ quota.o \ quotacheck.o \ ) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 584d3437e0aa..7ac2b0a99aae 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -741,9 +741,10 @@ struct xfs_scrub_metadata { #define XFS_SCRUB_TYPE_HEALTHY 26 /* everything checked out ok */ #define XFS_SCRUB_TYPE_RTRMAPBT 27 /* realtime reverse mapping btree */ #define XFS_SCRUB_TYPE_RTREFCBT 28 /* realtime reference count btree */ +#define XFS_SCRUB_TYPE_NLINKS 29 /* inode link counts */ /* Number of scrub subcommands. */ -#define XFS_SCRUB_TYPE_NR 29 +#define XFS_SCRUB_TYPE_NR 30 /* * This special type code only applies to the vectored scrub implementation. diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index d8e3ee1d24c4..dbd4145690b2 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -129,6 +129,7 @@ xchk_setup_quotacheck(struct xfs_scrub *sc) } #endif int xchk_setup_fscounters(struct xfs_scrub *sc); +int xchk_setup_nlinks(struct xfs_scrub *sc); void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa); int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno, diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 88a3969a9161..49a806400dac 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -109,6 +109,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK }, [XFS_SCRUB_TYPE_RTRMAPBT] = { XHG_RT, XFS_SICK_RT_RMAPBT }, [XFS_SCRUB_TYPE_RTREFCBT] = { XHG_RT, XFS_SICK_RT_REFCNTBT }, + [XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS }, }; /* Return the health status mask for this scrub type. */ diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c new file mode 100644 index 000000000000..d162cea228d3 --- /dev/null +++ b/fs/xfs/scrub/nlinks.c @@ -0,0 +1,573 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2021 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_iwalk.h" +#include "xfs_ialloc.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_ag.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/repair.h" +#include "scrub/array.h" +#include "scrub/iscan.h" +#include "scrub/nlinks.h" +#include "scrub/trace.h" + +/* + * Live Inode Link Count Checking + * ============================== + * + * Inode link counts are "summary" metadata, in the sense that they are + * computed as the number of directory entries referencing each file on the + * filesystem. Therefore, we compute the correct link counts by creating a + * shadow link count structure and walking every inode. + * + * Because we are scanning a live filesystem, it's possible that another thread + * will try to update the link counts for an inode that we've already scanned. + * This will cause our counts to be incorrect. Therefore, we hook all inode + * link count updates when the change is made to the incore inode. By + * shadowing transaction updates in this manner, live nlink check can ensure by + * locking the inode and the shadow structure that its own copies are not out + * of date. + * + * Note that we use srcu notifier hooks to minimize the overhead when live + * nlinks is /not/ running. + */ + +/* Set us up to scrub inode link counts. */ +int +xchk_setup_nlinks( + struct xfs_scrub *sc) +{ + sc->buf = kmem_zalloc(sizeof(struct xchk_nlinks), KM_NOFS | KM_MAYFAIL); + if (!sc->buf) + return -ENOMEM; + + return xchk_setup_fs(sc); +} + +/* Retrieve the shadow link count for the given inode. */ +int +xchk_nlinks_get_shadow_count( + struct xchk_nlinks *xnc, + xfs_ino_t ino, + xfs_nlink_t *nlinks) +{ + int error; + + error = xfbma_get(xnc->nlinks, ino, nlinks); + if (error == -ENODATA) { + /* + * ENODATA means we tried to read beyond the end of the sparse + * array. This isn't a big deal, just zero the incore record + * and return that. + */ + *nlinks = 0; + return 0; + } + return error; +} + +/* Update incore link count information. Caller must hold the iscan lock. */ +static int +xchk_nlinks_update_incore( + struct xchk_nlinks *xnc, + struct xfs_inode *dp, + xfs_ino_t ino, + int delta) +{ + xfs_nlink_t nlinks; + int error; + + trace_xchk_nlinks_update_incore(xnc->sc->mp, dp ? dp->i_ino : NULLFSINO, + ino, delta, __return_address); + + if (!xnc->nlinks) + return 0; + + error = xchk_nlinks_get_shadow_count(xnc, ino, &nlinks); + if (error) + return error; + + nlinks += delta; + + error = xfbma_set(xnc->nlinks, ino, &nlinks); + if (error == -EFBIG) { + /* + * EFBIG means we tried to store data at too high a byte offset + * in the sparse array. IOWs, we cannot complete the check and + * must notify userspace that the check was incomplete. + */ + xchk_set_incomplete(xnc->sc); + error = -ECANCELED; + } + return error; +} + +/* + * Apply a link count change from the regular filesystem into our shadow link + * count structure. + */ +static int +xchk_nlinks_mod_inode( + struct notifier_block *nb, + unsigned long arg, + void *data) +{ + struct xfs_nlink_mod_params *p = data; + struct xchk_nlinks *xnc; + int error; + + xnc = container_of(nb, struct xchk_nlinks, mod_hook); + + xchk_iscan_lock(&xnc->iscan); + if (!xchk_iscan_marked(&xnc->iscan, p->dp) || xnc->hook_dead) + goto out_unlock; + + error = xchk_nlinks_update_incore(xnc, p->dp, p->ino, p->delta); + if (error) + xnc->hook_dead = true; + +out_unlock: + xchk_iscan_unlock(&xnc->iscan); + return NOTIFY_DONE; +} + +struct xchk_walk_dir { + struct dir_context dir_iter; + struct xchk_nlinks *xnc; + struct xfs_inode *dp; +}; + +/* Bump the shadow link count for every inode referenced by this dir. */ +STATIC int +xchk_nlinks_walk_dir( + struct dir_context *dir_iter, + const char *name, + int namelen, + loff_t pos, + u64 ino, + unsigned type) +{ + struct xchk_walk_dir *xwd; + struct xchk_nlinks *xnc; + int error = -ECANCELED; + + xwd = container_of(dir_iter, struct xchk_walk_dir, dir_iter); + xnc = xwd->xnc; + + /* Update the shadow link counts if we haven't already failed. */ + xchk_iscan_lock(&xnc->iscan); + if (xnc->hook_dead) { + xchk_set_incomplete(xnc->sc); + goto out_unlock; + } + + error = xchk_nlinks_update_incore(xnc, xwd->dp, ino, 1); + if (error) { + xchk_set_incomplete(xnc->sc); + xnc->hook_dead = true; + } + +out_unlock: + xchk_iscan_unlock(&xnc->iscan); + return error; +} + +/* Bump the link counts of every entry in this directory. */ +STATIC int +xchk_nlinks_dir( + struct xchk_nlinks *xnc, + struct xfs_inode *dp) +{ + struct xfs_scrub *sc = xnc->sc; + struct xchk_walk_dir xwd = { + .dir_iter.actor = xchk_nlinks_walk_dir, + .dir_iter.pos = 0, + .xnc = xnc, + .dp = dp, + }; + loff_t oldpos; + size_t bufsize; + unsigned int lock_mode; + int error = 0; + + /* Lock out the VFS from changing this directory while we walk it. */ + xfs_ilock(dp, XFS_IOLOCK_SHARED); + + /* + * The dotdot entry of an unlinked directory still points to the last + * parent, but the parent no longer links to this directory. Skip the + * directory to avoid overcounting. + */ + if (VFS_I(dp)->i_nlink == 0) + goto out; + + /* + * If there are any blocks, read-ahead block 0 as we're almost certain + * to have the next operation be a read there. This is how we + * guarantee that the directory's extent map has been loaded, if there + * is one. + */ + lock_mode = xfs_ilock_data_map_shared(dp); + if (dp->i_df.if_nextents > 0) + error = xfs_dir3_data_readahead(dp, 0, 0); + xfs_iunlock(dp, lock_mode); + if (error) + goto out; + + /* + * Bump link counts for every dirent we see. Userspace usually asks + * for a 32k buffer, so we will too. + */ + bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, dp->i_disk_size); + do { + oldpos = xwd.dir_iter.pos; + error = xfs_readdir(sc->tp, dp, &xwd.dir_iter, bufsize); + } while (!error && oldpos < xwd.dir_iter.pos); + +out: + xfs_iunlock(dp, XFS_IOLOCK_SHARED); + return error; +} + +/* If this looks like a valid pointer, count it. */ +static inline int +xchk_nlinks_metafile( + struct xchk_nlinks *xnc, + xfs_ino_t ino) +{ + if (!xfs_verify_ino(xnc->sc->mp, ino)) + return 0; + + return xchk_nlinks_update_incore(xnc, NULL, ino, 1); +} + +/* Bump the link counts of metadata files rooted in the superblock. */ +STATIC int +xchk_nlinks_metafiles( + struct xchk_nlinks *xnc) +{ + struct xfs_mount *mp = xnc->sc->mp; + int error = -ECANCELED; + + xchk_iscan_lock(&xnc->iscan); + if (xnc->hook_dead) { + xchk_set_incomplete(xnc->sc); + goto out_unlock; + } + + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_rbmino); + if (error) + goto out_error; + + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_rsumino); + if (error) + goto out_error; + + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_uquotino); + if (error) + goto out_error; + + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_gquotino); + if (error) + goto out_error; + + error = xchk_nlinks_metafile(xnc, mp->m_sb.sb_pquotino); + +out_error: + if (error) { + xchk_set_incomplete(xnc->sc); + xnc->hook_dead = true; + } +out_unlock: + xchk_iscan_unlock(&xnc->iscan); + return error; +} + +/* Walk all directories and count inode links. */ +STATIC int +xchk_nlinks_collect( + struct xchk_nlinks *xnc) +{ + struct xfs_scrub *sc = xnc->sc; + struct xchk_iscan *iscan = &xnc->iscan; + struct xfs_inode *ip; + int flags = XFS_IGET_UNTRUSTED; + unsigned int retries = 20; + int error; + + /* Count the rt and quota files if they're rooted in the superblock. */ + if (!xfs_has_metadir(sc->mp)) { + error = xchk_nlinks_metafiles(xnc); + if (error) + return error; + } + + while (!(error = xchk_iscan_advance(iscan, sc->tp))) { + if (iscan->cursor_ino == NULLFSINO || + xchk_should_terminate(sc, &error)) + break; + + error = xfs_iget(sc->mp, sc->tp, iscan->cursor_ino, flags, 0, + &ip); + switch (error) { + case 0: + if (S_ISDIR(VFS_I(ip)->i_mode)) + error = xchk_nlinks_dir(xnc, ip); + xchk_irele(sc, ip); + if (error) + return error; + retries = 20; + break; + case -ENOENT: + /*¬ + * It's possible that this inode has lost all of its + * links but hasn't yet been inactivated. Try to push + * it towards inactivation. + */ + xfs_inodegc_flush(xnc->sc->mp); + fallthrough; + case -EINVAL: + /* + * We thought the inode was allocated, but iget failed + * to find it. This could be because the inobt lookup + * failed, or because there's an incore inode that + * thinks it's marked free. Either way, we back up + * one inode and try to advance the cursor. + */ + xchk_iscan_retry(iscan); + if (--retries == 0) { + xchk_set_incomplete(sc); + return -ECANCELED; + } + delay(HZ / 10); + break; + default: + return error; + } + } + + return error; +} + +/* Check the link count against an inode. */ +STATIC int +xchk_nlinks_compare_inode( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + void *data) +{ + struct xchk_nlinks *xnc = data; + struct xfs_inode *ip = NULL; + xfs_nlink_t live_nlink; + xfs_nlink_t ino_nlink = 0; + int error; + + error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, + &ip); + if (error == -ENOENT || error == -EINVAL) { + /* Inode wasn't found, so we'll compare against zero nlink. */ + error = 0; + } + if (error) + return error; + + xchk_iscan_lock(&xnc->iscan); + if (xnc->hook_dead) { + xchk_set_incomplete(xnc->sc); + error = -ECANCELED; + goto out_unlock; + } + error = xchk_nlinks_get_shadow_count(xnc, ino, &live_nlink); + if (error) { + xchk_set_incomplete(xnc->sc); + xnc->hook_dead = true; + goto out_unlock; + } + + if (ip) + ino_nlink = VFS_I(ip)->i_nlink; + + if (live_nlink != ino_nlink) { + trace_xchk_nlinks_compare_inode(mp, ino, ino_nlink, live_nlink); + xchk_ino_set_corrupt(xnc->sc, ino); + error = -ECANCELED; + } + +out_unlock: + xchk_iscan_unlock(&xnc->iscan); + if (ip) { + xfs_iunlock(ip, XFS_ILOCK_SHARED); + xchk_irele(xnc->sc, ip); + } + return error; +} + +/* + * Walk all the observed link counts, and make sure there's a matching incore + * inode and that its counts match ours. + */ +STATIC int +xchk_nlinks_walk_observations( + struct xchk_nlinks *xnc) +{ + struct xfs_scrub *sc = xnc->sc; + uint64_t nr = 0; + xfs_nlink_t nlink; + int error; + + if (!xnc->nlinks) + return 0; + + xchk_iscan_lock(&xnc->iscan); + while (!(error = xfbma_iter_get(xnc->nlinks, &nr, &nlink))) { + xchk_iscan_unlock(&xnc->iscan); + + if (xchk_should_terminate(xnc->sc, &error)) + return error; + + error = xchk_nlinks_compare_inode(sc->mp, sc->tp, nr - 1, xnc); + if (error) + return error; + + xchk_iscan_lock(&xnc->iscan); + } + xchk_iscan_unlock(&xnc->iscan); + + /* ENODATA means we hit the end of the array. */ + if (error == -ENODATA) + return 0; + + return error; +} + +/* Compare the link counts we observed against the live information. */ +STATIC int +xchk_nlinks_compare_counts( + struct xchk_nlinks *xnc) +{ + struct xfs_scrub *sc = xnc->sc; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return 0; + + error = xfs_iwalk(sc->mp, sc->tp, 0, XFS_IWALK_METADIR, + xchk_nlinks_compare_inode, 0, xnc); + if (error) + return error; + + /* Walk all the observed link counts and compare to the incore ones. */ + return xchk_nlinks_walk_observations(xnc); +} + +/* Tear down everything associated with a nlinks check. */ +static void +xchk_nlinks_teardown_scan( + struct xchk_nlinks *xnc) +{ + /* Discourage any hook functions that might be running. */ + xchk_iscan_lock(&xnc->iscan); + xnc->hook_dead = true; + xchk_iscan_unlock(&xnc->iscan); + + /* + * As noted above, the apply hook is responsible for cleaning up the + * shadow dquot accounting data when a transaction completes. The mod + * hook must be removed before the apply hook so that we don't + * mistakenly leave an active shadow account for the mod hook to get + * its hands on. No hooks should be running by the time this function + * completes. + */ + xfs_hook_del(&xnc->sc->mp->m_nlink_mod_hooks, &xnc->mod_hook); + + xfbma_destroy(xnc->nlinks); + xnc->nlinks = NULL; + + xchk_iscan_finish(&xnc->iscan); + xnc->sc = NULL; +} + +/* + * Scan all inodes in the entire filesystem to generate link count data. If + * the scan is successful, the counts will be left alive for a repair. If any + * error occurs, we'll tear everything down. + */ +STATIC int +xchk_nlinks_setup_scan( + struct xfs_scrub *sc, + struct xchk_nlinks *xnc) +{ + int error; + + ASSERT(xnc->sc == NULL); + xnc->sc = sc; + + xnc->hook_dead = false; + xchk_iscan_start(&xnc->iscan); + + error = -ENOMEM; + xnc->nlinks = xfbma_init("link counts", sizeof(xfs_nlink_t)); + if (!xnc->nlinks) + goto out_teardown; + + /* + * Hook into the bumplink/droplink code. The hook only triggers for + * inodes that were already scanned, and the scanner thread takes each + * inode's ILOCK, which means that any in-progress inode updates will + * finish before we can scan the inode. + */ + error = xfs_hook_add(&sc->mp->m_nlink_mod_hooks, &xnc->mod_hook, + xchk_nlinks_mod_inode); + if (error) + goto out_teardown; + + /* Use deferred cleanup to pass the inode link count data to repair. */ + sc->buf_cleanup = (void (*)(void *))xchk_nlinks_teardown_scan; + return 0; + +out_teardown: + xchk_nlinks_teardown_scan(xnc); + return error; +} + +/* Scrub the link count of all inodes on the filesystem. */ +int +xchk_nlinks( + struct xfs_scrub *sc) +{ + struct xchk_nlinks *xnc = sc->buf; + int error = 0; + + /* Check link counts on the live filesystem. */ + error = xchk_nlinks_setup_scan(sc, xnc); + if (error) + return error; + + /* Walk all inodes, picking up link count information. */ + error = xchk_nlinks_collect(xnc); + if (!xchk_xref_process_error(sc, 0, 0, &error)) + return error; + + /* Compare link counts. */ + error = xchk_nlinks_compare_counts(xnc); + if (!xchk_xref_process_error(sc, 0, 0, &error)) + return error; + + return 0; +} diff --git a/fs/xfs/scrub/nlinks.h b/fs/xfs/scrub/nlinks.h new file mode 100644 index 000000000000..27c6e0abba03 --- /dev/null +++ b/fs/xfs/scrub/nlinks.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_NLINKS_H__ +#define __XFS_SCRUB_NLINKS_H__ + +/* Live link count control structure. */ +struct xchk_nlinks { + struct xfs_scrub *sc; + + /* Shadow link count data. */ + struct xfbma *nlinks; + + struct xchk_iscan iscan; + + /* Hooks into bumplink/droplink code. */ + struct notifier_block mod_hook; + + /* Something failed during live tracking. */ + bool hook_dead; +}; + +int xchk_nlinks_get_shadow_count(struct xchk_nlinks *xnc, xfs_ino_t ino, + xfs_nlink_t *nlinks); + +#endif /* __XFS_SCRUB_NLINKS_H__ */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 27a0b3502200..44890551f73e 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -405,6 +405,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .has = xfs_has_rtreflink, .repair = xrep_rtrefcountbt, }, + [XFS_SCRUB_TYPE_NLINKS] = { /* inode link counts */ + .type = ST_FS, + .setup = xchk_setup_nlinks, + .scrub = xchk_nlinks, + .repair = xrep_notsupported, + }, }; /* This isn't a stable feature, warn once per day. */ diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 6d66a335ff44..8b36e180bc10 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -202,6 +202,7 @@ xchk_quotacheck(struct xfs_scrub *sc) } #endif int xchk_fscounters(struct xfs_scrub *sc); +int xchk_nlinks(struct xfs_scrub *sc); /* cross-referencing helpers */ void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno, diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 89d9c2b27b4d..7892b3e1d140 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -903,6 +903,56 @@ TRACE_EVENT(xchk_rtsum_record_free, __entry->v) ) +TRACE_EVENT(xchk_nlinks_update_incore, + TP_PROTO(struct xfs_mount *mp, xfs_ino_t dir, xfs_ino_t ino, int delta, + void *ret_ip), + TP_ARGS(mp, dir, ino, delta, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dir) + __field(xfs_ino_t, ino) + __field(int, delta) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->dir = dir; + __entry->ino = ino; + __entry->delta = delta; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d dir 0x%llx ino 0x%llx nlink_delta %d ret %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dir, __entry->ino, __entry->delta, __entry->ret_ip) +) + +DECLARE_EVENT_CLASS(xchk_nlink_diff_class, + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_nlink_t ondisk, + xfs_nlink_t saw), + TP_ARGS(mp, ino, ondisk, saw), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_nlink_t, ondisk) + __field(xfs_nlink_t, saw) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->ino = ino; + __entry->ondisk = ondisk; + __entry->saw = saw; + ), + TP_printk("dev %d:%d ino 0x%llx nlink %u saw_nlink %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, __entry->ondisk, __entry->saw) +); +#define DEFINE_SCRUB_NLINK_DIFF_EVENT(name) \ +DEFINE_EVENT(xchk_nlink_diff_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_nlink_t ondisk, \ + xfs_nlink_t saw), \ + TP_ARGS(mp, ino, ondisk, saw)) +DEFINE_SCRUB_NLINK_DIFF_EVENT(xchk_nlinks_compare_inode); + /* repair tracepoints */ #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3b3e7390e66d..6a4fc79c7f1d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -776,6 +776,16 @@ xfs_create( goto out_trans_cancel; /* + * Create ip with a reference from dp, and add '.' and '..' references + * if it's a directory. + */ + xfs_inode_nlink_delta(dp, ip, 1); + if (is_dir) { + xfs_inode_nlink_delta(ip, ip, 1); + xfs_inode_nlink_delta(ip, dp, 1); + } + + /* * If this is a synchronous mount, make sure that the * create transaction goes to disk before returning to * the user. @@ -1070,6 +1080,7 @@ xfs_link( error = xfs_dir_link_existing_child(tp, resblks, tdp, target_name, sip); if (error) goto error_return; + xfs_inode_nlink_delta(tdp, sip, 1); /* * If this is a synchronous mount, make sure that the @@ -2211,6 +2222,16 @@ xfs_remove( goto out_trans_cancel; /* + * Drop the link from dp to ip, and if ip was a directory, remove the + * '.' and '..' references since we freed the directory. + */ + xfs_inode_nlink_delta(dp, ip, -1); + if (S_ISDIR(VFS_I(ip)->i_mode)) { + xfs_inode_nlink_delta(ip, dp, -1); + xfs_inode_nlink_delta(ip, ip, -1); + } + + /* * If this is a synchronous mount, make sure that the * remove transaction goes to disk before returning to * the user. @@ -2337,6 +2358,67 @@ xfs_rename_alloc_whiteout( return 0; } +static inline void +xfs_rename_call_nlink_hooks( + struct xfs_inode *src_dp, + struct xfs_inode *src_ip, + struct xfs_inode *target_dp, + struct xfs_inode *target_ip, + struct xfs_inode *wip, + unsigned int flags) +{ + /* If we added a whiteout, add the reference from src_dp. */ + if (wip) + xfs_inode_nlink_delta(src_dp, wip, 1); + + /* Move the src_ip reference from src_dp to target_dp. */ + xfs_inode_nlink_delta(src_dp, src_ip, -1); + xfs_inode_nlink_delta(target_dp, src_ip, 1); + + /* + * If src_ip is a dir, move its '..' reference from src_dp to + * target_dp. + */ + if (S_ISDIR(VFS_I(src_ip)->i_mode)) { + xfs_inode_nlink_delta(src_ip, src_dp, -1); + xfs_inode_nlink_delta(src_ip, target_dp, 1); + } + + if (!target_ip) + return; + + if (flags & RENAME_EXCHANGE) { + /* Move the target_ip reference from target_dp to src_dp. */ + xfs_inode_nlink_delta(target_dp, target_ip, -1); + xfs_inode_nlink_delta(src_dp, target_ip, 1); + + /* + * If target_ip is a dir, move its '..' reference from + * target_dp to src_dp. + */ + if (S_ISDIR(VFS_I(target_ip)->i_mode)) { + xfs_inode_nlink_delta(target_ip, target_dp, -1); + xfs_inode_nlink_delta(target_ip, src_dp, 1); + } + + return; + } + + /* Drop target_ip's reference from target_dp. */ + xfs_inode_nlink_delta(target_dp, target_ip, -1); + + if (!S_ISDIR(VFS_I(target_ip)->i_mode)) + return; + + /* + * If target_ip was a dir, drop the '.' and '..' references since that + * was the last reference. + */ + ASSERT(VFS_I(target_ip)->i_nlink == 0); + xfs_inode_nlink_delta(target_ip, target_dp, -1); + xfs_inode_nlink_delta(target_ip, target_ip, -1); +} + /* * xfs_rename */ @@ -2477,6 +2559,9 @@ xfs_rename( VFS_I(wip)->i_state &= ~I_LINKABLE; } + xfs_rename_call_nlink_hooks(src_dp, src_ip, target_dp, target_ip, wip, + flags); + error = xfs_finish_rename(tp); if (wip) xfs_irele(wip); @@ -2948,3 +3033,22 @@ xfs_is_always_cow_inode( { return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount); } + +/* Call a hook to capture nlink updates in real time. */ +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) +void +xfs_inode_nlink_delta( + struct xfs_inode *dp, + struct xfs_inode *ip, + int delta) +{ + struct xfs_nlink_mod_params p; + struct xfs_mount *mp = ip->i_mount; + + p.dp = dp; + p.ino = ip->i_ino; + p.delta = delta; + + xfs_hook_call(&mp->m_nlink_mod_hooks, 0, &p); +} +#endif diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7801da45c20e..c07b8bea0be4 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -541,4 +541,11 @@ int xfs_icreate_dqalloc(const struct xfs_icreate_args *args, int xfs_file_cow_around(struct xfs_inode *ip, loff_t pos, long long int count); +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) +void xfs_inode_nlink_delta(struct xfs_inode *dp, struct xfs_inode *ip, + int delta); +#else +# define xfs_inode_nlink_delta(dp, ip, delta) ((void)0) +#endif + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 500f1ef6513f..a161aabd8438 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -258,8 +258,20 @@ typedef struct xfs_mount { * while a repair freeze is in progress. */ struct mutex m_scrub_freeze; + +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) + /* online nlink check stuff */ + struct xfs_hook_chain m_nlink_mod_hooks; +#endif } xfs_mount_t; +/* Parameters for xfs_bumplink/droplink hook. */ +struct xfs_nlink_mod_params { + struct xfs_inode *dp; + xfs_ino_t ino; + int delta; +}; + #define M_IGEO(mp) (&(mp)->m_ino_geo) /* diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 4d886b50ec92..d8c03d84b78a 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1992,6 +1992,10 @@ static int xfs_init_fs_context( mp->m_logbsize = -1; mp->m_allocsize_log = 16; /* 64k */ +#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) + xfs_hook_init(&mp->m_nlink_mod_hooks); +#endif + /* * Copy binary VFS mount flags we are interested in. */ diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 177987c3716f..239d5b23e9c4 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -332,6 +332,7 @@ xfs_symlink( goto out_trans_cancel; xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + xfs_inode_nlink_delta(dp, ip, 1); /* * If this is a synchronous mount, make sure that the |