summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:59:07 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:40:46 -0700
commit6773c22cee21e0a56a6d2e2d0c305999191e019f (patch)
treecf5c7b9114bc3780dee751c02557fc7d5efa697d /fs/xfs/scrub
parent053acce8b26cba39af566e54cd61eb6d2ce198da (diff)
xfs: online repair of parent pointers
Teach the online repair code to fix directory '..' entries (aka directory parent pointers). Since this requires us to know how to scan every dirent in every directory on the filesystem, we can reuse the parent scanner components to validate (or find!) the correct parent entry when rebuilding directories too. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/xfs/scrub')
-rw-r--r--fs/xfs/scrub/dir_repair.c25
-rw-r--r--fs/xfs/scrub/parent.c49
-rw-r--r--fs/xfs/scrub/parent.h14
-rw-r--r--fs/xfs/scrub/parent_repair.c398
-rw-r--r--fs/xfs/scrub/repair.h2
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/trace.h3
7 files changed, 470 insertions, 23 deletions
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index ade9b8399cf2..c7e3968abd9d 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -35,6 +35,7 @@
#include "scrub/tempfile.h"
#include "scrub/xfarray.h"
#include "scrub/xfblob.h"
+#include "scrub/parent.h"
/*
* Directory Repair
@@ -1101,6 +1102,7 @@ xrep_directory_find_parent(
{
struct xfs_scrub *sc = rd->sc;
xfs_ino_t parent_ino;
+ int error;
/*
* If we're the root directory, we are our own parent. If we're an
@@ -1114,16 +1116,25 @@ xrep_directory_find_parent(
}
/*
- * Try to look up '..'; if it seems plausible, go with it. This will
- * be augmented later.
+ * Try to look up '..'; if it seems plausible, go with it. Check that
+ * the parent directory actually points to this directory. If so, we
+ * are good to go. Any errors just push us to scanning the fs.
*/
parent_ino = xrep_dotdot_lookup(sc);
- if (parent_ino != NULLFSINO) {
- rd->parent_ino = parent_ino;
- return 0;
- }
+ error = xrep_parent_confirm(sc, &parent_ino);
+ if (!error && parent_ino != NULLFSINO)
+ goto foundit;
+
+ /* Otherwise, scan the entire filesystem. */
+ error = xrep_parent_scan(sc, &parent_ino);
+ if (error)
+ return error;
+ if (parent_ino == NULLFSINO)
+ return -EFSCORRUPTED;
- return -EFSCORRUPTED;
+foundit:
+ rd->parent_ino = parent_ino;
+ return 0;
}
/*
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 9dbfa4585167..5e11d5141d19 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -16,6 +16,7 @@
#include "xfs_dir2_priv.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
+#include "scrub/parent.h"
/* Set us up to scrub parents. */
int
@@ -121,6 +122,36 @@ out:
}
/*
+ * Try to iolock the parent dir @dp in shared mode and the child dir @sc->ip
+ * exclusively.
+ */
+int
+xchk_parent_lock_two_dirs(
+ struct xfs_scrub *sc,
+ struct xfs_inode *dp)
+{
+ int error = 0;
+
+ /* Callers shouldn't do this, but protect ourselves anyway. */
+ if (dp == sc->ip) {
+ ASSERT(dp != sc->ip);
+ return -EDEADLOCK;
+ }
+
+ xchk_iunlock(sc, sc->ilock_flags);
+ while (true) {
+ if (xchk_should_terminate(sc, &error))
+ return error;
+ xfs_ilock(dp, XFS_IOLOCK_SHARED);
+ if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
+ break;
+ xfs_iunlock(dp, XFS_IOLOCK_SHARED);
+ }
+
+ return 0;
+}
+
+/*
* Given the inode number of the alleged parent of the inode being
* scrubbed, try to validate that the parent has exactly one directory
* entry pointing back to the inode being scrubbed.
@@ -185,21 +216,9 @@ xchk_parent_validate(
* try to lock the alleged parent and trylock the child.
*/
if (!xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
- xchk_iunlock(sc, sc->ilock_flags);
- while (true) {
- if (xchk_should_terminate(sc, &error))
- goto out_rele;
- xfs_ilock(dp, XFS_IOLOCK_SHARED);
- if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
- break;
- xfs_iunlock(dp, XFS_IOLOCK_SHARED);
- }
-
- /*
- * Now that we've locked out updates to the child directory,
- * re-sample the expected nlink and the '..' dirent.
- */
- expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
+ error = xchk_parent_lock_two_dirs(sc, dp);
+ if (error)
+ goto out_rele;
error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot,
&parent_ino, NULL);
diff --git a/fs/xfs/scrub/parent.h b/fs/xfs/scrub/parent.h
new file mode 100644
index 000000000000..10612f204d41
--- /dev/null
+++ b/fs/xfs/scrub/parent.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_PARENT_H__
+#define __XFS_SCRUB_PARENT_H__
+
+int xchk_parent_lock_two_dirs(struct xfs_scrub *sc, struct xfs_inode *dp);
+
+int xrep_parent_confirm(struct xfs_scrub *sc, xfs_ino_t *parent_ino);
+int xrep_parent_scan(struct xfs_scrub *sc, xfs_ino_t *parent_ino);
+
+#endif /* __XFS_SCRUB_PARENT_H__ */
diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c
new file mode 100644
index 000000000000..3eb2fcb6ee40
--- /dev/null
+++ b/fs/xfs/scrub/parent_repair.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_trans_space.h"
+#include "xfs_health.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/iscan.h"
+#include "scrub/parent.h"
+
+struct xrep_findparent_info {
+ /* The directory currently being scanning, and a readdir context. */
+ struct dir_context dc;
+ struct xfs_inode *dp;
+
+ /*
+ * Scrub context. We're looking for a @dp containing a directory
+ * entry pointing to sc->ip->i_ino.
+ */
+ struct xfs_scrub *sc;
+
+ /*
+ * Parent that we've found for sc->ip. If we're scanning the entire
+ * directory tree, we need this to ensure that we only find /one/
+ * parent directory.
+ */
+ xfs_ino_t found_parent;
+
+ /*
+ * Errors encountered during scanning. Note that the vfs readdir
+ * functions squash the nonzero codes that we return here into a
+ * "short" directory read, so the actual error codes are tracked and
+ * returned separately for simplicity.
+ */
+ int scan_error;
+};
+
+/*
+ * If this directory entry points to the scrub target inode, then the directory
+ * we're scanning is the parent of the scrub target inode.
+ */
+STATIC int
+xrep_findparent_dirent(
+ struct dir_context *dc,
+ const char *name,
+ int namelen,
+ loff_t pos,
+ u64 ino,
+ unsigned type)
+{
+ struct xrep_findparent_info *fpi;
+
+ fpi = container_of(dc, struct xrep_findparent_info, dc);
+
+ if (xchk_should_terminate(fpi->sc, &fpi->scan_error))
+ return 1;
+
+ if (ino != fpi->sc->ip->i_ino)
+ return 0;
+
+ /* Should never happen, but we want to bail out regardless. */
+ if (namelen == 0) {
+ fpi->scan_error = -EFSCORRUPTED;
+ return 1;
+ }
+
+ /*
+ * Ignore dotdot and dot entries -- we're looking for parent -> child
+ * links only.
+ */
+ if (name[0] == '.' && (namelen == 1 ||
+ (namelen == 2 && name[1] == '.')))
+ return 0;
+
+ /* Uhoh, more than one parent for a dir? */
+ if (fpi->found_parent != NULLFSINO) {
+ trace_xrep_findparent_dirent(fpi->sc->ip, 0);
+ fpi->scan_error = -EFSCORRUPTED;
+ return 1;
+ }
+
+ /* We found a potential parent; remember this. */
+ trace_xrep_findparent_dirent(fpi->sc->ip, fpi->dp->i_ino);
+ fpi->found_parent = fpi->dp->i_ino;
+ return 0;
+}
+
+/*
+ * If this is a directory, walk the dirents looking for any that point to the
+ * scrub target inode.
+ */
+STATIC int
+xrep_findparent_walk_directory(
+ struct xrep_findparent_info *fpi)
+{
+ struct xfs_scrub *sc = fpi->sc;
+ struct xfs_inode *dp = fpi->dp;
+ loff_t oldpos;
+ size_t bufsize;
+ unsigned int lock_mode;
+ int error = 0;
+
+ /* We can't point to ourselves. */
+ if (dp == sc->ip)
+ return 0;
+
+ /*
+ * If we can take the parent's lock then we're ready to scan.
+ *
+ * If we can't, release the lock on the child, and then try to lock the
+ * alleged parent and child at the same time. Use trylock for the
+ * second lock so that we don't ABBA deadlock the system.
+ */
+ if (!xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
+ xfs_ino_t orig_parent, new_parent;
+
+ orig_parent = xrep_dotdot_lookup(sc);
+
+ error = xchk_parent_lock_two_dirs(sc, dp);
+ if (error)
+ return error;
+
+ /*
+ * If the parent changed or the child was unlinked while the
+ * child directory was unlocked, we don't need to do anything
+ * further.
+ */
+ new_parent = xrep_dotdot_lookup(sc);
+ if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) {
+ fpi->found_parent = new_parent;
+ error = -ECANCELED;
+ goto out_unlock;
+ }
+ }
+
+ /*
+ * If this directory is known to be sick, we cannot scan it reliably
+ * and must abort.
+ */
+ if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
+ XFS_SICK_INO_BMBTD |
+ XFS_SICK_INO_DIR)) {
+ error = -EFSCORRUPTED;
+ goto out_unlock;
+ }
+
+ /*
+ * If there are any blocks, read-ahead block 0 as we're almost certain
+ * to have the next operation be a read there. This is how we
+ * guarantee that the directory's extent map has been loaded, if there
+ * is one.
+ */
+ lock_mode = xfs_ilock_data_map_shared(dp);
+ if (dp->i_df.if_nextents > 0)
+ error = xfs_dir3_data_readahead(dp, 0, 0);
+ xfs_iunlock(dp, lock_mode);
+ if (error)
+ goto out_unlock;
+
+ /*
+ * Scan the directory to see if there it contains an entry pointing to
+ * the directory that we are repairing.
+ */
+ bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, dp->i_disk_size);
+ fpi->dc.pos = 0;
+ oldpos = 0;
+ while (true) {
+ error = xfs_readdir(sc->tp, dp, &fpi->dc, bufsize);
+ if (error)
+ break;
+ if (fpi->scan_error) {
+ error = fpi->scan_error;
+ break;
+ }
+ if (oldpos == fpi->dc.pos)
+ break;
+ oldpos = fpi->dc.pos;
+ }
+
+out_unlock:
+ xfs_iunlock(dp, XFS_IOLOCK_SHARED);
+ return error;
+}
+
+/*
+ * Confirm that the directory @parent_ino actually contains a directory entry
+ * pointing to the child @sc->ip->ino. This function returns one of several
+ * ways:
+ *
+ * Returns 0 with @parent_ino unchanged if the parent was confirmed.
+ * Returns 0 with a different @parent_ino if we had to cycle inode locks to
+ * walk the alleged parent and the child's '..' entry was changed in the mean
+ * time.
+ * Returns 0 with @parent_ino set to NULLFSINO if the parent was not valid.
+ * Returns the usual negative errno if something else happened.
+ */
+int
+xrep_parent_confirm(
+ struct xfs_scrub *sc,
+ xfs_ino_t *parent_ino)
+{
+ struct xrep_findparent_info fpi = {
+ .sc = sc,
+ .dc.actor = xrep_findparent_dirent,
+ .found_parent = NULLFSINO,
+ };
+ int error;
+
+ /*
+ * The root directory always points to itself. Unlinked dirs can point
+ * anywhere, so we point them at the root dir too.
+ */
+ if (sc->ip == sc->mp->m_rootip || VFS_I(sc->ip)->i_nlink == 0) {
+ *parent_ino = sc->mp->m_sb.sb_rootino;
+ return 0;
+ }
+
+ /* Reject garbage parent inode numbers and self-referential parents. */
+ if (*parent_ino == NULLFSINO)
+ return 0;
+ if (!xfs_verify_dir_ino(sc->mp, *parent_ino) ||
+ *parent_ino == sc->ip->i_ino) {
+ *parent_ino = NULLFSINO;
+ return 0;
+ }
+
+ error = xfs_iget(sc->mp, sc->tp, *parent_ino, XFS_IGET_UNTRUSTED, 0,
+ &fpi.dp);
+ if (error)
+ return error;
+
+ if (!S_ISDIR(VFS_I(fpi.dp)->i_mode)) {
+ *parent_ino = NULLFSINO;
+ goto out_rele;
+ }
+
+ error = xrep_findparent_walk_directory(&fpi);
+ if (error == -ECANCELED)
+ error = 0;
+ if (error)
+ goto out_rele;
+
+ *parent_ino = fpi.found_parent;
+out_rele:
+ xfs_irele(fpi.dp);
+ return error;
+}
+
+/*
+ * Scan the entire filesystem looking for a parent inode.
+ *
+ * Returns 0 with @parent_ino set to the parent that we found, or the current
+ * value of the child's '..' entry, if it changed when we had to drop the
+ * child's IOLOCK.
+ * Returns 0 with @parent_ino set to NULLFSINO if we didn't find anything.
+ * Returns the usual negative errno if something else happened.
+ */
+int
+xrep_parent_scan(
+ struct xfs_scrub *sc,
+ xfs_ino_t *parent_ino)
+{
+ struct xrep_findparent_info fpi = {
+ .sc = sc,
+ .dc.actor = xrep_findparent_dirent,
+ .found_parent = NULLFSINO,
+ };
+ struct xchk_iscan iscan = {
+ .iget_tries = 20,
+ .iget_retry_delay = HZ / 10,
+ };
+ int ret;
+
+ xchk_iscan_start(&iscan);
+ while ((ret = xchk_iscan_advance(sc, &iscan)) == 1) {
+ ret = xchk_iscan_iget(sc, &iscan, &fpi.dp);
+ if (ret == -EAGAIN)
+ continue;
+ if (ret)
+ break;
+
+ if (S_ISDIR(VFS_I(fpi.dp)->i_mode))
+ ret = xrep_findparent_walk_directory(&fpi);
+ xchk_iscan_mark_visited(&iscan, fpi.dp);
+ xfs_irele(fpi.dp);
+ if (ret)
+ break;
+
+ if (xchk_should_terminate(sc, &ret))
+ break;
+ }
+ xchk_iscan_finish(&iscan);
+
+ if (ret == -ECANCELED)
+ ret = 0;
+ if (ret)
+ return ret;
+
+ *parent_ino = fpi.found_parent;
+ return 0;
+}
+
+/*
+ * Repairing The Directory Parent Pointer
+ * ======================================
+ *
+ * Currently, only directories support parent pointers (in the form of '..'
+ * entries), so we simply scan the filesystem and update the '..' entry.
+ *
+ * Note that because the only parent pointer is the dotdot entry, we won't
+ * touch an unhealthy directory, since the directory repair code is perfectly
+ * capable of rebuilding a directory with the proper parent inode.
+ */
+
+/* Replace a directory's parent '..' pointer. */
+STATIC int
+xrep_parent_reset_dir(
+ struct xfs_scrub *sc,
+ xfs_ino_t parent_ino)
+{
+ unsigned int spaceres;
+ int error;
+
+ trace_xrep_parent_reset_dir(sc->ip, parent_ino);
+
+ /* Reserve more space just in case we have to expand the dir. */
+ spaceres = XFS_RENAME_SPACE_RES(sc->mp, 2);
+ error = xfs_trans_reserve_more_inode(sc->tp, sc->ip, spaceres, 0);
+ if (error)
+ return error;
+
+ /* Replace the dotdot entry. */
+ return xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot, parent_ino,
+ spaceres);
+}
+
+int
+xrep_parent(
+ struct xfs_scrub *sc)
+{
+ xfs_ino_t parent_ino, curr_parent;
+ unsigned int sick, checked;
+ int error;
+
+ /*
+ * Avoid sick directories. The parent pointer scrubber dropped the
+ * ILOCK and MMAPLOCK, but we still hold IOLOCK_EXCL on the directory.
+ * There shouldn't be anyone else clearing the directory's sick status.
+ */
+ xfs_inode_measure_sickness(sc->ip, &sick, &checked);
+ if (sick & XFS_SICK_INO_DIR)
+ return -EFSCORRUPTED;
+
+ /* Scan the entire filesystem for a parent. */
+ error = xrep_parent_scan(sc, &parent_ino);
+ if (error)
+ return error;
+ if (parent_ino == NULLFSINO)
+ return -EFSCORRUPTED;
+
+ /* If the '..' entry is already set to the parent inode, we're done. */
+ curr_parent = xrep_dotdot_lookup(sc);
+ if (curr_parent != NULLFSINO && curr_parent == parent_ino)
+ return 0;
+
+ /* Re-take the ILOCK, we're going to need it to modify the dir. */
+ xchk_ilock(sc, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ error = xrep_ino_dqattach(sc);
+ if (error)
+ return error;
+
+ return xrep_parent_reset_dir(sc, parent_ino);
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 30eab6397941..f9dd6a036998 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -124,6 +124,7 @@ int xrep_symlink(struct xfs_scrub *sc);
int xrep_fscounters(struct xfs_scrub *sc);
int xrep_xattr(struct xfs_scrub *sc);
int xrep_directory(struct xfs_scrub *sc);
+int xrep_parent(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_QUOTA
int xrep_quota(struct xfs_scrub *sc);
@@ -282,6 +283,7 @@ xrep_setup_xattr(
#define xrep_rtsummary xrep_notsupported
#define xrep_xattr xrep_notsupported
#define xrep_directory xrep_notsupported
+#define xrep_parent xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 2ecd1ac20286..14c76a55a9bb 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -321,7 +321,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_INODE,
.setup = xchk_setup_parent,
.scrub = xchk_parent,
- .repair = xrep_notsupported,
+ .repair = xrep_parent,
},
[XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
.type = ST_FS,
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 98f6fe49e836..4e293a8c7724 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1683,6 +1683,7 @@ DEFINE_EVENT(xrep_directory_class, name, \
TP_ARGS(dp, parent_ino))
DEFINE_XREP_DIR_CLASS(xrep_directory_rebuild_tree);
DEFINE_XREP_DIR_CLASS(xrep_directory_reset_fork);
+DEFINE_XREP_DIR_CLASS(xrep_parent_reset_dir);
DECLARE_EVENT_CLASS(xrep_dirent_class,
TP_PROTO(struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t ino),
@@ -1740,6 +1741,8 @@ DEFINE_EVENT(xrep_parent_salvage_class, name, \
TP_PROTO(struct xfs_inode *dp, xfs_ino_t ino), \
TP_ARGS(dp, ino))
DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_directory_salvaged_parent);
+DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_dir_salvaged_parent);
+DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_findparent_dirent);
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */