summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-03-17 09:27:17 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-03-25 17:08:55 -0700
commit8e45025becb250087a4bb20e82f549a9297ec15f (patch)
tree1b2472d66b9acd7895fc9a8249fac538f01dcd2d
parentf8fc5b1afb08260b6f758fcad67892fca0b4bd93 (diff)
xfs: teach scrub to check file nlinks
Copy-pasta the online quotacheck code to check inode link counts too. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_fs.h3
-rw-r--r--fs/xfs/scrub/common.h1
-rw-r--r--fs/xfs/scrub/health.c1
-rw-r--r--fs/xfs/scrub/nlinks.c569
-rw-r--r--fs/xfs/scrub/nlinks.h32
-rw-r--r--fs/xfs/scrub/scrub.c6
-rw-r--r--fs/xfs/scrub/scrub.h1
-rw-r--r--fs/xfs/scrub/trace.h27
-rw-r--r--fs/xfs/xfs_inode.c104
-rw-r--r--fs/xfs/xfs_inode.h7
-rw-r--r--fs/xfs/xfs_mount.h12
-rw-r--r--fs/xfs/xfs_super.c4
-rw-r--r--fs/xfs/xfs_symlink.c1
14 files changed, 768 insertions, 1 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index fe2ee79e8e93..80679edab5ae 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -155,6 +155,7 @@ xfs-y += $(addprefix scrub/, \
health.o \
ialloc.o \
inode.o \
+ nlinks.o \
parent.o \
refcount.o \
rmap.o \
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 9bc5e324284b..0ba6b8528415 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -744,9 +744,10 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_HEALTHY 26 /* everything checked out ok */
#define XFS_SCRUB_TYPE_RTRMAPBT 27 /* realtime reverse mapping btree */
#define XFS_SCRUB_TYPE_RTREFCBT 28 /* realtime reference count btree */
+#define XFS_SCRUB_TYPE_NLINKS 29 /* inode link counts */
/* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR 29
+#define XFS_SCRUB_TYPE_NR 30
/*
* This special type code only applies to the vectored scrub implementation.
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 08bf69610288..3b6b7b18cddf 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -129,6 +129,7 @@ xchk_setup_quotacheck(struct xfs_scrub *sc)
}
#endif
int xchk_setup_fscounters(struct xfs_scrub *sc);
+int xchk_setup_nlinks(struct xfs_scrub *sc);
void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa);
int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno,
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index 1aeafdc6b8f7..6973384dc01e 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -109,6 +109,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK },
[XFS_SCRUB_TYPE_RTRMAPBT] = { XHG_RT, XFS_SICK_RT_RMAPBT },
[XFS_SCRUB_TYPE_RTREFCBT] = { XHG_RT, XFS_SICK_RT_REFCNTBT },
+ [XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS },
};
/* Return the health status mask for this scrub type. */
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
new file mode 100644
index 000000000000..1a841ca2e457
--- /dev/null
+++ b/fs/xfs/scrub/nlinks.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_iwalk.h"
+#include "xfs_ialloc.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/repair.h"
+#include "scrub/array.h"
+#include "scrub/nlinks.h"
+#include "scrub/trace.h"
+
+/*
+ * Live Inode Link Count Checking
+ * ==============================
+ *
+ * Inode link counts are "summary" metadata, in the sense that they are
+ * computed as the number of directory entries referencing each file on the
+ * filesystem. Therefore, we compute the correct link counts by creating a
+ * shadow link count structure and walking every inode.
+ *
+ * Because we are scanning a live filesystem, it's possible that another thread
+ * will try to update the link counts for an inode that we've already scanned.
+ * This will cause our counts to be incorrect. Therefore, we hook all inode
+ * link count updates when the change is made to the incore inode. By
+ * shadowing transaction updates in this manner, live nlink check can ensure by
+ * locking the inode and the shadow structure that its own copies are not out
+ * of date.
+ *
+ * Note that we use srcu notifier hooks to minimize the overhead when live
+ * nlinks is /not/ running.
+ */
+
+/* Set us up to scrub inode link counts. */
+int
+xchk_setup_nlinks(
+ struct xfs_scrub *sc)
+{
+ sc->buf = kmem_zalloc(sizeof(struct xchk_nlinks), KM_NOFS | KM_MAYFAIL);
+ if (!sc->buf)
+ return -ENOMEM;
+
+ return xchk_setup_fs(sc);
+}
+
+/* Retrieve the shadow link count for the given inode. */
+int
+xchk_nlinks_get_shadow_count(
+ struct xchk_nlinks *xnc,
+ xfs_ino_t ino,
+ xfs_nlink_t *nlinks)
+{
+ int error;
+
+ error = xfbma_get(xnc->nlinks, ino, nlinks);
+ if (error == -ENODATA) {
+ /*
+ * ENODATA means we tried to read beyond the end of the sparse
+ * array. This isn't a big deal, just zero the incore record
+ * and return that.
+ */
+ *nlinks = 0;
+ return 0;
+ }
+ return error;
+}
+
+/* Update incore link count information. Caller must hold the xnc lock. */
+static int
+xchk_nlinks_update_incore(
+ struct xchk_nlinks *xnc,
+ xfs_ino_t ino,
+ int64_t delta)
+{
+ xfs_nlink_t nlinks;
+ int error;
+
+ if (!xnc->nlinks)
+ return 0;
+
+ error = xchk_nlinks_get_shadow_count(xnc, ino, &nlinks);
+ if (error)
+ return error;
+
+ nlinks += delta;
+
+ error = xfbma_set(xnc->nlinks, ino, &nlinks);
+ if (error == -EFBIG) {
+ /*
+ * EFBIG means we tried to store data at too high a byte offset
+ * in the sparse array. IOWs, we cannot complete the check and
+ * must notify userspace that the check was incomplete.
+ */
+ xchk_set_incomplete(xnc->sc);
+ error = -ECANCELED;
+ }
+ return error;
+}
+
+/*
+ * Apply a link count change from the regular filesystem into our shadow link
+ * count structure.
+ */
+static int
+xchk_nlinks_mod_inode(
+ struct notifier_block *nb,
+ unsigned long arg,
+ void *data)
+{
+ struct xfs_nlink_mod_params *p = data;
+ struct xchk_nlinks *xnc;
+ int error;
+
+ xnc = container_of(nb, struct xchk_nlinks, mod_hook);
+
+ mutex_lock(&xnc->lock);
+ if (xnc->last_ino < p->dir || xnc->hook_dead)
+ goto out_unlock;
+
+ error = xchk_nlinks_update_incore(xnc, p->ino, p->delta);
+ if (error)
+ xnc->hook_dead = true;
+
+out_unlock:
+ mutex_unlock(&xnc->lock);
+ return NOTIFY_DONE;
+}
+
+struct xchk_walk_dir {
+ struct dir_context dir_iter;
+ struct xchk_nlinks *xnc;
+};
+
+/* Bump the shadow link count for every inode referenced by this dir. */
+STATIC int
+xchk_nlinks_walk_dir(
+ struct dir_context *dir_iter,
+ const char *name,
+ int namelen,
+ loff_t pos,
+ u64 ino,
+ unsigned type)
+{
+ struct xchk_walk_dir *xwd;
+ int error;
+
+ xwd = container_of(dir_iter, struct xchk_walk_dir, dir_iter);
+
+ /* Update the shadow link counts if we haven't already failed. */
+ mutex_lock(&xwd->xnc->lock);
+ if (xwd->xnc->hook_dead) {
+ xchk_set_incomplete(xwd->xnc->sc);
+ error = -ECANCELED;
+ goto out_unlock;
+ }
+
+ error = xchk_nlinks_update_incore(xwd->xnc, ino, 1);
+ if (error) {
+ xchk_set_incomplete(xwd->xnc->sc);
+ xwd->xnc->hook_dead = true;
+ }
+
+out_unlock:
+ mutex_unlock(&xwd->xnc->lock);
+ return error;
+}
+
+/* Bump the link counts of every entry in this directory. */
+STATIC int
+xchk_nlinks_dir(
+ struct xchk_nlinks *xnc,
+ struct xfs_inode *ip)
+{
+ struct xfs_scrub *sc = xnc->sc;
+ struct xchk_walk_dir xwd = {
+ .dir_iter.actor = xchk_nlinks_walk_dir,
+ .dir_iter.pos = 0,
+ .xnc = xnc,
+ };
+ loff_t oldpos;
+ size_t bufsize;
+ unsigned int lock_mode;
+ int error = 0;
+
+ /* Walk directory to find valid references to inodes. */
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+ /*
+ * If there are any blocks, read-ahead block 0 as we're almost certain
+ * to have the next operation be a read there. This is how we
+ * guarantee that the directory's extent map has been loaded, if there
+ * is one.
+ */
+ lock_mode = xfs_ilock_data_map_shared(ip);
+ if (ip->i_df.if_nextents > 0)
+ error = xfs_dir3_data_readahead(ip, 0, 0);
+ xfs_iunlock(ip, lock_mode);
+ if (error)
+ goto out;
+
+ /*
+ * Bump link counts for every dirent we see. Userspace usually asks
+ * for a 32k buffer, so we will too.
+ */
+ bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size);
+ do {
+ oldpos = xwd.dir_iter.pos;
+ error = xfs_readdir(sc->tp, ip, &xwd.dir_iter, bufsize);
+ } while (!error && oldpos < xwd.dir_iter.pos);
+
+out:
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ return error;
+}
+
+/* Bump the link count of this metadata file. */
+STATIC int
+xchk_nlinks_metafile(
+ struct xchk_nlinks *xnc,
+ struct xfs_inode *ip)
+{
+ int error;
+
+ if (xfs_sb_version_hasmetadir(&xnc->sc->mp->m_sb))
+ return 0;
+
+ mutex_lock(&xnc->lock);
+ if (xnc->hook_dead)
+ goto out_unlock;
+
+ error = xchk_nlinks_update_incore(xnc, ip->i_ino, 1);
+ if (error) {
+ xchk_set_incomplete(xnc->sc);
+ xnc->hook_dead = true;
+ }
+
+out_unlock:
+ mutex_unlock(&xnc->lock);
+ return error;
+}
+
+/*
+ * Advance ino to the next inode that the inobt thinks is allocated, being
+ * careful to jump to the next AG. Advancing ino effectively means that we've
+ * pushed the nlinks scan forward, so set the nlinks cursor to (ino - 1) so
+ * that our shadow dquot tracking will track inode allocations in that range
+ * once we release the AGI buffer.
+ */
+STATIC int
+xchk_nlinks_advance(
+ struct xchk_nlinks *xnc,
+ xfs_ino_t *ino)
+{
+ struct xfs_scrub *sc = xnc->sc;
+ struct xfs_buf *agi_bp;
+ xfs_agnumber_t agno;
+ int error;
+
+next_ag:
+ agno = XFS_INO_TO_AGNO(sc->mp, *ino);
+ if (agno >= sc->mp->m_sb.sb_agcount) {
+ *ino = NULLFSINO;
+ return 0;
+ }
+ error = xfs_ialloc_read_agi(sc->mp, sc->tp, agno, &agi_bp);
+ if (error)
+ return error;
+
+ error = xfs_iwalk_find_next(sc->mp, sc->tp, agi_bp, ino);
+
+ /*
+ * Update the nlinks scan cursor so that the nlink hooks will begin to
+ * capture link count updates being made by ongoing transactions.
+ */
+ mutex_lock(&xnc->lock);
+ xnc->last_ino = *ino - 1;
+ mutex_unlock(&xnc->lock);
+
+ if (error || *ino == NULLFSINO) {
+ xfs_trans_brelse(sc->tp, agi_bp);
+ if (error == -EAGAIN)
+ goto next_ag;
+ return error;
+ }
+
+ xfs_trans_brelse(sc->tp, agi_bp);
+ return error;
+}
+
+/* Walk all directories and count inode links. */
+STATIC int
+xchk_nlinks_collect(
+ struct xchk_nlinks *xnc)
+{
+ struct xfs_scrub *sc = xnc->sc;
+ struct xfs_inode *ip;
+ xfs_ino_t ino = 0;
+ int flags = XFS_IGET_UNTRUSTED;
+ unsigned int retries = 20;
+ int error;
+
+ while (!(error = xchk_nlinks_advance(xnc, &ino)) && ino != NULLFSINO) {
+ if (xchk_should_terminate(sc, &error))
+ break;
+
+ error = xfs_iget(sc->mp, sc->tp, ino, flags, 0, &ip);
+ switch (error) {
+ case 0:
+ if (S_ISDIR(VFS_I(ip)->i_mode))
+ error = xchk_nlinks_dir(xnc, ip);
+ else if (S_ISREG(VFS_I(ip)->i_mode) &&
+ xfs_is_metadata_inode(ip))
+ error = xchk_nlinks_metafile(xnc, ip);
+ xchk_irele(sc, ip);
+ if (error)
+ return error;
+ retries = 20;
+ break;
+ case -ENOENT:
+ /*¬
+ * It's possible that this inode has lost all of its
+ * links but hasn't yet been inactivated. Try to push
+ * it towards inactivation.
+ */
+ xfs_inodegc_flush_ino(xnc->sc->mp, ino);
+ /* fall through */
+ case -EINVAL:
+ /*
+ * We thought the inode was allocated, but iget failed
+ * to find it. This could be because the inobt lookup
+ * failed, or because there's an incore inode that
+ * thinks it's marked free. Either way, we back up
+ * one inode and try to advance the cursor.
+ */
+ ino--;
+ if (--retries == 0) {
+ xchk_set_incomplete(sc);
+ return -ECANCELED;
+ }
+ delay(HZ / 10);
+ break;
+ default:
+ return error;
+ }
+ }
+ return error;
+}
+
+/* Check the link count against an inode. */
+STATIC int
+xchk_nlinks_compare_inode(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ void *data)
+{
+ struct xchk_nlinks *xnc = data;
+ struct xfs_inode *ip = NULL;
+ xfs_nlink_t live_nlink;
+ xfs_nlink_t ino_nlink = 0;
+ int error;
+
+ error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED,
+ &ip);
+ if (error == -ENOENT || error == -EINVAL) {
+ /* Inode wasn't found, so we'll compare against zero nlink. */
+ error = 0;
+ }
+ if (error)
+ return error;
+
+ mutex_lock(&xnc->lock);
+ if (xnc->hook_dead) {
+ xchk_set_incomplete(xnc->sc);
+ error = -ECANCELED;
+ goto out_unlock;
+ }
+ error = xchk_nlinks_get_shadow_count(xnc, ino, &live_nlink);
+ if (error) {
+ xchk_set_incomplete(xnc->sc);
+ xnc->hook_dead = true;
+ goto out_unlock;
+ }
+
+ if (ip)
+ ino_nlink = VFS_I(ip)->i_nlink;
+
+ if (live_nlink != ino_nlink) {
+ trace_xchk_nlinks_compare_inode(mp, ino, ino_nlink, live_nlink);
+ xchk_ino_set_corrupt(xnc->sc, ino);
+ error = -ECANCELED;
+ }
+
+out_unlock:
+ mutex_unlock(&xnc->lock);
+ if (ip) {
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ xchk_irele(xnc->sc, ip);
+ }
+ return error;
+}
+
+/*
+ * Walk all the observed link counts, and make sure there's a matching incore
+ * inode and that its counts match ours.
+ */
+STATIC int
+xchk_nlinks_walk_observations(
+ struct xchk_nlinks *xnc)
+{
+ struct xfs_scrub *sc = xnc->sc;
+ uint64_t nr = 0;
+ xfs_nlink_t nlink;
+ int error;
+
+ if (!xnc->nlinks)
+ return 0;
+
+ mutex_lock(&xnc->lock);
+ while (!(error = xfbma_iter_get(xnc->nlinks, &nr, &nlink))) {
+ mutex_unlock(&xnc->lock);
+
+ if (xchk_should_terminate(xnc->sc, &error))
+ return error;
+
+ error = xchk_nlinks_compare_inode(sc->mp, sc->tp, nr - 1, xnc);
+ if (error)
+ return error;
+
+ mutex_lock(&xnc->lock);
+ }
+ mutex_unlock(&xnc->lock);
+
+ /* ENODATA means we hit the end of the array. */
+ if (error == -ENODATA)
+ return 0;
+
+ return error;
+}
+
+/* Compare the link counts we observed against the live information. */
+STATIC int
+xchk_nlinks_compare_counts(
+ struct xchk_nlinks *xnc)
+{
+ struct xfs_scrub *sc = xnc->sc;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return 0;
+
+ error = xfs_iwalk(sc->mp, sc->tp, 0, XFS_IWALK_METADIR,
+ xchk_nlinks_compare_inode, 0, xnc);
+ if (error)
+ return error;
+
+ /* Walk all the observed link counts and compare to the incore ones. */
+ return xchk_nlinks_walk_observations(xnc);
+}
+
+/* Tear down everything associated with a nlinks check. */
+static void
+xchk_nlinks_teardown_scan(
+ struct xchk_nlinks *xnc)
+{
+ /* Discourage any hook functions that might be running. */
+ mutex_lock(&xnc->lock);
+ xnc->hook_dead = true;
+ mutex_unlock(&xnc->lock);
+
+ /*
+ * As noted above, the apply hook is responsible for cleaning up the
+ * shadow dquot accounting data when a transaction completes. The mod
+ * hook must be removed before the apply hook so that we don't
+ * mistakenly leave an active shadow account for the mod hook to get
+ * its hands on. No hooks should be running by the time this function
+ * completes.
+ */
+ xfs_hook_del(&xnc->sc->mp->m_nlink_mod_hooks, &xnc->mod_hook);
+
+ xfbma_destroy(xnc->nlinks);
+ xnc->nlinks = NULL;
+
+ mutex_destroy(&xnc->lock);
+ xnc->sc = NULL;
+}
+
+/*
+ * Scan all inodes in the entire filesystem to generate link count data. If
+ * the scan is successful, the counts will be left alive for a repair. If any
+ * error occurs, we'll tear everything down.
+ */
+STATIC int
+xchk_nlinks_setup_scan(
+ struct xfs_scrub *sc,
+ struct xchk_nlinks *xnc)
+{
+ int error;
+
+ ASSERT(xnc->sc == NULL);
+ xnc->sc = sc;
+
+ xnc->hook_dead = false;
+ mutex_init(&xnc->lock);
+
+ error = -ENOMEM;
+ xnc->nlinks = xfbma_init("link counts", sizeof(xfs_nlink_t));
+ if (!xnc->nlinks)
+ goto out_teardown;
+
+ /*
+ * Hook into the bumplink/droplink code. The hook only triggers for
+ * inodes that were already scanned, and the scanner thread takes each
+ * inode's ILOCK, which means that any in-progress inode updates will
+ * finish before we can scan the inode.
+ */
+ error = xfs_hook_add(&sc->mp->m_nlink_mod_hooks, &xnc->mod_hook,
+ xchk_nlinks_mod_inode);
+ if (error)
+ goto out_teardown;
+
+ /* Use deferred cleanup to pass the inode link count data to repair. */
+ sc->buf_cleanup = (void (*)(void *))xchk_nlinks_teardown_scan;
+ return 0;
+
+out_teardown:
+ xchk_nlinks_teardown_scan(xnc);
+ return error;
+}
+
+/* Scrub the link count of all inodes on the filesystem. */
+int
+xchk_nlinks(
+ struct xfs_scrub *sc)
+{
+ struct xchk_nlinks *xnc = sc->buf;
+ int error = 0;
+
+ /* Check link counts on the live filesystem. */
+ error = xchk_nlinks_setup_scan(sc, xnc);
+ if (error)
+ return error;
+
+ /* Walk all inodes, picking up link count information. */
+ error = xchk_nlinks_collect(xnc);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+
+ /* Compare link counts. */
+ error = xchk_nlinks_compare_counts(xnc);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+
+ return 0;
+}
diff --git a/fs/xfs/scrub/nlinks.h b/fs/xfs/scrub/nlinks.h
new file mode 100644
index 000000000000..b2b57e9c2965
--- /dev/null
+++ b/fs/xfs/scrub/nlinks.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_NLINKS_H__
+#define __XFS_SCRUB_NLINKS_H__
+
+/* Live link count control structure. */
+struct xchk_nlinks {
+ struct xfs_scrub *sc;
+
+ /* Shadow link count data. */
+ struct xfbma *nlinks;
+
+ /* Last inode scanned by the inode walk. */
+ xfs_ino_t last_ino;
+
+ /* Hooks into bumplink/droplink code. */
+ struct notifier_block mod_hook;
+
+ /* Lock for the data used to capture live nlink updates. */
+ struct mutex lock;
+
+ /* Something failed during live tracking. */
+ bool hook_dead;
+};
+
+int xchk_nlinks_get_shadow_count(struct xchk_nlinks *xnc, xfs_ino_t ino,
+ xfs_nlink_t *nlinks);
+
+#endif /* __XFS_SCRUB_NLINKS_H__ */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index e1a2367e9042..1d51a6002ede 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -405,6 +405,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.has = xfs_sb_version_hasrtreflink,
.repair = xrep_rtrefcountbt,
},
+ [XFS_SCRUB_TYPE_NLINKS] = { /* inode link counts */
+ .type = ST_FS,
+ .setup = xchk_setup_nlinks,
+ .scrub = xchk_nlinks,
+ .repair = xrep_notsupported,
+ },
};
/* This isn't a stable feature, warn once per day. */
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 314d52fcf0fc..ebd6c6ee4dbc 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -191,6 +191,7 @@ xchk_quotacheck(struct xfs_scrub *sc)
}
#endif
int xchk_fscounters(struct xfs_scrub *sc);
+int xchk_nlinks(struct xfs_scrub *sc);
/* cross-referencing helpers */
void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno,
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 74137dbb78fe..bcfdcb4ccc16 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -878,6 +878,33 @@ TRACE_EVENT(xchk_rtsum_record_free,
__entry->v)
)
+DECLARE_EVENT_CLASS(xchk_nlink_diff_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_nlink_t ondisk,
+ xfs_nlink_t saw),
+ TP_ARGS(mp, ino, ondisk, saw),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_nlink_t, ondisk)
+ __field(xfs_nlink_t, saw)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->ino = ino;
+ __entry->ondisk = ondisk;
+ __entry->saw = saw;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx nlink %u saw_nlink %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino, __entry->ondisk, __entry->saw)
+);
+#define DEFINE_SCRUB_NLINK_DIFF_EVENT(name) \
+DEFINE_EVENT(xchk_nlink_diff_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_nlink_t ondisk, \
+ xfs_nlink_t saw), \
+ TP_ARGS(mp, ino, ondisk, saw))
+DEFINE_SCRUB_NLINK_DIFF_EVENT(xchk_nlinks_compare_inode);
+
/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 92f136917047..e2ada2e86abf 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -765,6 +765,16 @@ xfs_create(
goto out_trans_cancel;
/*
+ * Create ip with a reference from dp, and add '.' and '..' references
+ * if it's a directory.
+ */
+ xfs_inode_nlink_delta(dp, ip, 1);
+ if (is_dir) {
+ xfs_inode_nlink_delta(ip, ip, 1);
+ xfs_inode_nlink_delta(ip, dp, 1);
+ }
+
+ /*
* If this is a synchronous mount, make sure that the
* create transaction goes to disk before returning to
* the user.
@@ -1056,6 +1066,7 @@ xfs_link(
error = xfs_dir_link_existing_child(tp, resblks, tdp, target_name, sip);
if (error)
goto error_return;
+ xfs_inode_nlink_delta(tdp, sip, 1);
/*
* If this is a synchronous mount, make sure that the
@@ -2176,6 +2187,16 @@ xfs_remove(
goto out_trans_cancel;
/*
+ * Drop the link from dp to ip, and if ip was a directory, remove the
+ * '.' and '..' references since we freed the directory.
+ */
+ xfs_inode_nlink_delta(dp, ip, -1);
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ xfs_inode_nlink_delta(ip, dp, -1);
+ xfs_inode_nlink_delta(ip, ip, -1);
+ }
+
+ /*
* If this is a synchronous mount, make sure that the
* remove transaction goes to disk before returning to
* the user.
@@ -2303,6 +2324,67 @@ xfs_rename_alloc_whiteout(
return 0;
}
+static inline void
+xfs_rename_call_nlink_hooks(
+ struct xfs_inode *src_dp,
+ struct xfs_inode *src_ip,
+ struct xfs_inode *target_dp,
+ struct xfs_inode *target_ip,
+ struct xfs_inode *wip,
+ unsigned int flags)
+{
+ /* If we added a whiteout, add the reference from src_dp. */
+ if (wip)
+ xfs_inode_nlink_delta(src_dp, wip, 1);
+
+ /* Move the src_ip reference from src_dp to target_dp. */
+ xfs_inode_nlink_delta(src_dp, src_ip, -1);
+ xfs_inode_nlink_delta(target_dp, src_ip, 1);
+
+ /*
+ * If src_ip is a dir, move its '..' reference from src_dp to
+ * target_dp.
+ */
+ if (S_ISDIR(VFS_I(src_ip)->i_mode)) {
+ xfs_inode_nlink_delta(src_ip, src_dp, -1);
+ xfs_inode_nlink_delta(src_ip, target_dp, 1);
+ }
+
+ if (!target_ip)
+ return;
+
+ if (flags & RENAME_EXCHANGE) {
+ /* Move the target_ip reference from target_dp to src_dp. */
+ xfs_inode_nlink_delta(target_dp, target_ip, -1);
+ xfs_inode_nlink_delta(src_dp, target_ip, 1);
+
+ /*
+ * If target_ip is a dir, move its '..' reference from
+ * target_dp to src_dp.
+ */
+ if (S_ISDIR(VFS_I(target_ip)->i_mode)) {
+ xfs_inode_nlink_delta(target_ip, target_dp, -1);
+ xfs_inode_nlink_delta(target_ip, src_dp, 1);
+ }
+
+ return;
+ }
+
+ /* Drop target_ip's reference from target_dp. */
+ xfs_inode_nlink_delta(target_dp, target_ip, -1);
+
+ if (!S_ISDIR(VFS_I(target_ip)->i_mode))
+ return;
+
+ /*
+ * If target_ip was a dir, drop the '.' and '..' references since that
+ * was the last reference.
+ */
+ ASSERT(VFS_I(target_ip)->i_nlink == 0);
+ xfs_inode_nlink_delta(target_ip, target_dp, -1);
+ xfs_inode_nlink_delta(target_ip, target_ip, -1);
+}
+
/*
* xfs_rename
*/
@@ -2443,6 +2525,9 @@ xfs_rename(
VFS_I(wip)->i_state &= ~I_LINKABLE;
}
+ xfs_rename_call_nlink_hooks(src_dp, src_ip, target_dp, target_ip, wip,
+ flags);
+
error = xfs_finish_rename(tp);
if (wip)
xfs_irele(wip);
@@ -2905,3 +2990,22 @@ xfs_is_always_cow_inode(
return ip->i_mount->m_always_cow &&
xfs_sb_version_hasreflink(&ip->i_mount->m_sb);
}
+
+/* Call a hook to capture nlink updates in real time. */
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB)
+void
+xfs_inode_nlink_delta(
+ struct xfs_inode *dp,
+ struct xfs_inode *ip,
+ int32_t delta)
+{
+ struct xfs_nlink_mod_params p;
+ struct xfs_mount *mp = ip->i_mount;
+
+ p.dir = dp->i_ino;
+ p.ino = ip->i_ino;
+ p.delta = delta;
+
+ xfs_hook_call(&mp->m_nlink_mod_hooks, 0, &p);
+}
+#endif
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 060cfb591515..fc611bd8e9a1 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -508,4 +508,11 @@ int xfs_icreate_dqalloc(const struct xfs_ialloc_args *args,
struct xfs_dquot **udqpp, struct xfs_dquot **gdqpp,
struct xfs_dquot **pdqpp);
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB)
+void xfs_inode_nlink_delta(struct xfs_inode *dp, struct xfs_inode *ip,
+ int32_t delta);
+#else
+# define xfs_inode_nlink_delta(dp, ip, delta) ((void)0)
+#endif
+
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9ec60413fd05..73b0c39782b2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -254,8 +254,20 @@ typedef struct xfs_mount {
/* Blocks reserved for all kinds of inode-based (rt) metadata. */
struct xfs_ag_resv m_rtmeta_resv;
+
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB)
+ /* online nlink check stuff */
+ struct xfs_hook_chain m_nlink_mod_hooks;
+#endif
} xfs_mount_t;
+/* Parameters for xfs_bumplink/droplink hook. */
+struct xfs_nlink_mod_params {
+ xfs_ino_t dir;
+ xfs_ino_t ino;
+ int32_t delta;
+};
+
#define M_IGEO(mp) (&(mp)->m_ino_geo)
/*
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index efad38847230..0ac13183524f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1976,6 +1976,10 @@ static int xfs_init_fs_context(
mp->m_logbsize = -1;
mp->m_allocsize_log = 16; /* 64k */
+#if IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB)
+ xfs_hook_init(&mp->m_nlink_mod_hooks);
+#endif
+
/*
* Copy binary VFS mount flags we are interested in.
*/
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index a8fff7f5e428..fa7f146533a3 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -331,6 +331,7 @@ xfs_symlink(
goto out_trans_cancel;
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+ xfs_inode_nlink_delta(dp, ip, 1);
/*
* If this is a synchronous mount, make sure that the