summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/parent_repair.c
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:59:07 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:40:46 -0700
commit6773c22cee21e0a56a6d2e2d0c305999191e019f (patch)
treecf5c7b9114bc3780dee751c02557fc7d5efa697d /fs/xfs/scrub/parent_repair.c
parent053acce8b26cba39af566e54cd61eb6d2ce198da (diff)
xfs: online repair of parent pointers
Teach the online repair code to fix directory '..' entries (aka directory parent pointers). Since this requires us to know how to scan every dirent in every directory on the filesystem, we can reuse the parent scanner components to validate (or find!) the correct parent entry when rebuilding directories too. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/xfs/scrub/parent_repair.c')
-rw-r--r--fs/xfs/scrub/parent_repair.c398
1 files changed, 398 insertions, 0 deletions
diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c
new file mode 100644
index 000000000000..3eb2fcb6ee40
--- /dev/null
+++ b/fs/xfs/scrub/parent_repair.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_trans_space.h"
+#include "xfs_health.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/iscan.h"
+#include "scrub/parent.h"
+
+struct xrep_findparent_info {
+ /* The directory currently being scanning, and a readdir context. */
+ struct dir_context dc;
+ struct xfs_inode *dp;
+
+ /*
+ * Scrub context. We're looking for a @dp containing a directory
+ * entry pointing to sc->ip->i_ino.
+ */
+ struct xfs_scrub *sc;
+
+ /*
+ * Parent that we've found for sc->ip. If we're scanning the entire
+ * directory tree, we need this to ensure that we only find /one/
+ * parent directory.
+ */
+ xfs_ino_t found_parent;
+
+ /*
+ * Errors encountered during scanning. Note that the vfs readdir
+ * functions squash the nonzero codes that we return here into a
+ * "short" directory read, so the actual error codes are tracked and
+ * returned separately for simplicity.
+ */
+ int scan_error;
+};
+
+/*
+ * If this directory entry points to the scrub target inode, then the directory
+ * we're scanning is the parent of the scrub target inode.
+ */
+STATIC int
+xrep_findparent_dirent(
+ struct dir_context *dc,
+ const char *name,
+ int namelen,
+ loff_t pos,
+ u64 ino,
+ unsigned type)
+{
+ struct xrep_findparent_info *fpi;
+
+ fpi = container_of(dc, struct xrep_findparent_info, dc);
+
+ if (xchk_should_terminate(fpi->sc, &fpi->scan_error))
+ return 1;
+
+ if (ino != fpi->sc->ip->i_ino)
+ return 0;
+
+ /* Should never happen, but we want to bail out regardless. */
+ if (namelen == 0) {
+ fpi->scan_error = -EFSCORRUPTED;
+ return 1;
+ }
+
+ /*
+ * Ignore dotdot and dot entries -- we're looking for parent -> child
+ * links only.
+ */
+ if (name[0] == '.' && (namelen == 1 ||
+ (namelen == 2 && name[1] == '.')))
+ return 0;
+
+ /* Uhoh, more than one parent for a dir? */
+ if (fpi->found_parent != NULLFSINO) {
+ trace_xrep_findparent_dirent(fpi->sc->ip, 0);
+ fpi->scan_error = -EFSCORRUPTED;
+ return 1;
+ }
+
+ /* We found a potential parent; remember this. */
+ trace_xrep_findparent_dirent(fpi->sc->ip, fpi->dp->i_ino);
+ fpi->found_parent = fpi->dp->i_ino;
+ return 0;
+}
+
+/*
+ * If this is a directory, walk the dirents looking for any that point to the
+ * scrub target inode.
+ */
+STATIC int
+xrep_findparent_walk_directory(
+ struct xrep_findparent_info *fpi)
+{
+ struct xfs_scrub *sc = fpi->sc;
+ struct xfs_inode *dp = fpi->dp;
+ loff_t oldpos;
+ size_t bufsize;
+ unsigned int lock_mode;
+ int error = 0;
+
+ /* We can't point to ourselves. */
+ if (dp == sc->ip)
+ return 0;
+
+ /*
+ * If we can take the parent's lock then we're ready to scan.
+ *
+ * If we can't, release the lock on the child, and then try to lock the
+ * alleged parent and child at the same time. Use trylock for the
+ * second lock so that we don't ABBA deadlock the system.
+ */
+ if (!xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
+ xfs_ino_t orig_parent, new_parent;
+
+ orig_parent = xrep_dotdot_lookup(sc);
+
+ error = xchk_parent_lock_two_dirs(sc, dp);
+ if (error)
+ return error;
+
+ /*
+ * If the parent changed or the child was unlinked while the
+ * child directory was unlocked, we don't need to do anything
+ * further.
+ */
+ new_parent = xrep_dotdot_lookup(sc);
+ if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) {
+ fpi->found_parent = new_parent;
+ error = -ECANCELED;
+ goto out_unlock;
+ }
+ }
+
+ /*
+ * If this directory is known to be sick, we cannot scan it reliably
+ * and must abort.
+ */
+ if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
+ XFS_SICK_INO_BMBTD |
+ XFS_SICK_INO_DIR)) {
+ error = -EFSCORRUPTED;
+ goto out_unlock;
+ }
+
+ /*
+ * If there are any blocks, read-ahead block 0 as we're almost certain
+ * to have the next operation be a read there. This is how we
+ * guarantee that the directory's extent map has been loaded, if there
+ * is one.
+ */
+ lock_mode = xfs_ilock_data_map_shared(dp);
+ if (dp->i_df.if_nextents > 0)
+ error = xfs_dir3_data_readahead(dp, 0, 0);
+ xfs_iunlock(dp, lock_mode);
+ if (error)
+ goto out_unlock;
+
+ /*
+ * Scan the directory to see if there it contains an entry pointing to
+ * the directory that we are repairing.
+ */
+ bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, dp->i_disk_size);
+ fpi->dc.pos = 0;
+ oldpos = 0;
+ while (true) {
+ error = xfs_readdir(sc->tp, dp, &fpi->dc, bufsize);
+ if (error)
+ break;
+ if (fpi->scan_error) {
+ error = fpi->scan_error;
+ break;
+ }
+ if (oldpos == fpi->dc.pos)
+ break;
+ oldpos = fpi->dc.pos;
+ }
+
+out_unlock:
+ xfs_iunlock(dp, XFS_IOLOCK_SHARED);
+ return error;
+}
+
+/*
+ * Confirm that the directory @parent_ino actually contains a directory entry
+ * pointing to the child @sc->ip->ino. This function returns one of several
+ * ways:
+ *
+ * Returns 0 with @parent_ino unchanged if the parent was confirmed.
+ * Returns 0 with a different @parent_ino if we had to cycle inode locks to
+ * walk the alleged parent and the child's '..' entry was changed in the mean
+ * time.
+ * Returns 0 with @parent_ino set to NULLFSINO if the parent was not valid.
+ * Returns the usual negative errno if something else happened.
+ */
+int
+xrep_parent_confirm(
+ struct xfs_scrub *sc,
+ xfs_ino_t *parent_ino)
+{
+ struct xrep_findparent_info fpi = {
+ .sc = sc,
+ .dc.actor = xrep_findparent_dirent,
+ .found_parent = NULLFSINO,
+ };
+ int error;
+
+ /*
+ * The root directory always points to itself. Unlinked dirs can point
+ * anywhere, so we point them at the root dir too.
+ */
+ if (sc->ip == sc->mp->m_rootip || VFS_I(sc->ip)->i_nlink == 0) {
+ *parent_ino = sc->mp->m_sb.sb_rootino;
+ return 0;
+ }
+
+ /* Reject garbage parent inode numbers and self-referential parents. */
+ if (*parent_ino == NULLFSINO)
+ return 0;
+ if (!xfs_verify_dir_ino(sc->mp, *parent_ino) ||
+ *parent_ino == sc->ip->i_ino) {
+ *parent_ino = NULLFSINO;
+ return 0;
+ }
+
+ error = xfs_iget(sc->mp, sc->tp, *parent_ino, XFS_IGET_UNTRUSTED, 0,
+ &fpi.dp);
+ if (error)
+ return error;
+
+ if (!S_ISDIR(VFS_I(fpi.dp)->i_mode)) {
+ *parent_ino = NULLFSINO;
+ goto out_rele;
+ }
+
+ error = xrep_findparent_walk_directory(&fpi);
+ if (error == -ECANCELED)
+ error = 0;
+ if (error)
+ goto out_rele;
+
+ *parent_ino = fpi.found_parent;
+out_rele:
+ xfs_irele(fpi.dp);
+ return error;
+}
+
+/*
+ * Scan the entire filesystem looking for a parent inode.
+ *
+ * Returns 0 with @parent_ino set to the parent that we found, or the current
+ * value of the child's '..' entry, if it changed when we had to drop the
+ * child's IOLOCK.
+ * Returns 0 with @parent_ino set to NULLFSINO if we didn't find anything.
+ * Returns the usual negative errno if something else happened.
+ */
+int
+xrep_parent_scan(
+ struct xfs_scrub *sc,
+ xfs_ino_t *parent_ino)
+{
+ struct xrep_findparent_info fpi = {
+ .sc = sc,
+ .dc.actor = xrep_findparent_dirent,
+ .found_parent = NULLFSINO,
+ };
+ struct xchk_iscan iscan = {
+ .iget_tries = 20,
+ .iget_retry_delay = HZ / 10,
+ };
+ int ret;
+
+ xchk_iscan_start(&iscan);
+ while ((ret = xchk_iscan_advance(sc, &iscan)) == 1) {
+ ret = xchk_iscan_iget(sc, &iscan, &fpi.dp);
+ if (ret == -EAGAIN)
+ continue;
+ if (ret)
+ break;
+
+ if (S_ISDIR(VFS_I(fpi.dp)->i_mode))
+ ret = xrep_findparent_walk_directory(&fpi);
+ xchk_iscan_mark_visited(&iscan, fpi.dp);
+ xfs_irele(fpi.dp);
+ if (ret)
+ break;
+
+ if (xchk_should_terminate(sc, &ret))
+ break;
+ }
+ xchk_iscan_finish(&iscan);
+
+ if (ret == -ECANCELED)
+ ret = 0;
+ if (ret)
+ return ret;
+
+ *parent_ino = fpi.found_parent;
+ return 0;
+}
+
+/*
+ * Repairing The Directory Parent Pointer
+ * ======================================
+ *
+ * Currently, only directories support parent pointers (in the form of '..'
+ * entries), so we simply scan the filesystem and update the '..' entry.
+ *
+ * Note that because the only parent pointer is the dotdot entry, we won't
+ * touch an unhealthy directory, since the directory repair code is perfectly
+ * capable of rebuilding a directory with the proper parent inode.
+ */
+
+/* Replace a directory's parent '..' pointer. */
+STATIC int
+xrep_parent_reset_dir(
+ struct xfs_scrub *sc,
+ xfs_ino_t parent_ino)
+{
+ unsigned int spaceres;
+ int error;
+
+ trace_xrep_parent_reset_dir(sc->ip, parent_ino);
+
+ /* Reserve more space just in case we have to expand the dir. */
+ spaceres = XFS_RENAME_SPACE_RES(sc->mp, 2);
+ error = xfs_trans_reserve_more_inode(sc->tp, sc->ip, spaceres, 0);
+ if (error)
+ return error;
+
+ /* Replace the dotdot entry. */
+ return xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot, parent_ino,
+ spaceres);
+}
+
+int
+xrep_parent(
+ struct xfs_scrub *sc)
+{
+ xfs_ino_t parent_ino, curr_parent;
+ unsigned int sick, checked;
+ int error;
+
+ /*
+ * Avoid sick directories. The parent pointer scrubber dropped the
+ * ILOCK and MMAPLOCK, but we still hold IOLOCK_EXCL on the directory.
+ * There shouldn't be anyone else clearing the directory's sick status.
+ */
+ xfs_inode_measure_sickness(sc->ip, &sick, &checked);
+ if (sick & XFS_SICK_INO_DIR)
+ return -EFSCORRUPTED;
+
+ /* Scan the entire filesystem for a parent. */
+ error = xrep_parent_scan(sc, &parent_ino);
+ if (error)
+ return error;
+ if (parent_ino == NULLFSINO)
+ return -EFSCORRUPTED;
+
+ /* If the '..' entry is already set to the parent inode, we're done. */
+ curr_parent = xrep_dotdot_lookup(sc);
+ if (curr_parent != NULLFSINO && curr_parent == parent_ino)
+ return 0;
+
+ /* Re-take the ILOCK, we're going to need it to modify the dir. */
+ xchk_ilock(sc, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ error = xrep_ino_dqattach(sc);
+ if (error)
+ return error;
+
+ return xrep_parent_reset_dir(sc, parent_ino);
+}