summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-03-19 10:13:12 -0700
committerDarrick J. Wong <darrick.wong@oracle.com>2020-06-01 21:16:37 -0700
commit2490c4ceed3fcfd17afc53c2ddc1edc04034a26c (patch)
treeabc07e1e770226ed99b7e6410534b0b4e6e964fe
parentdaad2af6e6130efb1f6568772d4f32d67ec95cb0 (diff)
xfs: teach online directory repair to scan for the parent
Enhance the online directory repair code to try to scan for a directory's parent if it doesn't find it while salvaging the directory contents. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/dir.c6
-rw-r--r--fs/xfs/scrub/dir_repair.c130
-rw-r--r--fs/xfs/scrub/parent.c3
-rw-r--r--fs/xfs/scrub/parent.h18
-rw-r--r--fs/xfs/scrub/parent_repair.c205
6 files changed, 359 insertions, 4 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 11c1dc33088b..b09730c1c69f 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -178,6 +178,7 @@ xfs-y += $(addprefix scrub/, \
fscounters_repair.o \
ialloc_repair.o \
inode_repair.o \
+ parent_repair.o \
refcount_repair.o \
repair.o \
rmap_repair.o \
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 53200aad0c4c..e318dd46cb15 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -28,6 +28,12 @@ xchk_setup_directory(
unsigned int sz;
int error;
+ if (sc->flags & XCHK_TRY_HARDER) {
+ error = xchk_fs_freeze(sc);
+ if (error)
+ return error;
+ }
+
error = xchk_setup_inode_contents(sc, ip, 0);
if (error)
return error;
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 041d25858dbc..a0b50631f822 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -24,6 +24,7 @@
#include "xfs_quota.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
+#include "xfs_iwalk.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -31,6 +32,7 @@
#include "scrub/repair.h"
#include "scrub/array.h"
#include "scrub/blob.h"
+#include "scrub/parent.h"
/*
* Directory Repair
@@ -682,6 +684,120 @@ xrep_dir_rebuild_tree(
}
/*
+ * If this directory entry points to the directory we're rebuilding, then the
+ * directory we're scanning is the parent. Remember the parent.
+ */
+STATIC int
+xrep_dir_absorb_parent(
+ struct xfs_inode *dp,
+ struct xfs_name *name,
+ unsigned int dtype,
+ void *data)
+{
+ struct xrep_dir *rd = data;
+ int error = 0;
+
+ /* Uhoh, more than one parent for a dir? */
+ if (rd->parent_ino != NULLFSINO)
+ return -EFSCORRUPTED;
+
+ if (xchk_should_terminate(rd->sc, &error))
+ return error;
+
+ /* We found a potential parent; remember this. */
+ rd->parent_ino = dp->i_ino;
+ return 0;
+}
+
+/*
+ * Make sure we return with a valid parent inode.
+ *
+ * If the directory salvaging step found a single '..' entry, check the
+ * alleged parent for a dentry pointing to the directory. If this succeds,
+ * we're done. Otherwise, scan the entire filesystem for a parent.
+ */
+STATIC int
+xrep_dir_validate_parent(
+ struct xrep_dir *rd)
+{
+ struct xfs_scrub *sc = rd->sc;
+ struct xfs_inode *parent;
+ xfs_nlink_t expected_nlink, nlink;
+ int error;
+
+ /*
+ * If the directory salvage scan found no parent or found an obviously
+ * incorrect parent, jump to the filesystem scan.
+ *
+ * Otherwise, if the alleged parent seems plausible, scan the directory
+ * to make sure it really points to us.
+ */
+ if (!xrep_parent_acceptable(sc, rd->parent_ino))
+ goto scan;
+
+ /*
+ * Grab this parent inode. Since we release the inode before we cancel
+ * the scrub transaction and don't know if releasing the inode will
+ * trigger eofblocks cleanup (which allocates what would be a nested
+ * transaction), we avoid DONTCACHE here.
+ */
+ error = xfs_iget(sc->mp, sc->tp, rd->parent_ino, XFS_IGET_UNTRUSTED, 0,
+ &parent);
+ if (error)
+ goto scan;
+ if (!S_ISDIR(VFS_I(parent)->i_mode))
+ goto rele_scan;
+
+ /*
+ * We prefer to keep the inode locked while we lock and search its
+ * alleged parent for a forward reference. If we can grab the iolock,
+ * validate the pointers and we're done. We must use nowait here to
+ * avoid an ABBA deadlock on the parent and the child inodes.
+ */
+ if (!xfs_ilock_nowait(parent, XFS_IOLOCK_SHARED))
+ goto rele_scan;
+
+ /*
+ * If we're an unlinked directory, the parent /won't/ have a link
+ * to us. Otherwise, it should have one link.
+ */
+ expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
+
+ error = xchk_parent_count_parent_dentries(sc, parent, &nlink);
+ if (error)
+ goto unlock_rele_scan;
+
+ /* The parent is an exact match, we're done. */
+ if (nlink == expected_nlink) {
+ xfs_iunlock(parent, XFS_IOLOCK_SHARED);
+ xfs_irele(parent);
+ return 0;
+ }
+
+unlock_rele_scan:
+ xfs_iunlock(parent, XFS_IOLOCK_SHARED);
+rele_scan:
+ xfs_irele(parent);
+scan:
+ /*
+ * If we're an unlinked directory, the parent /won't/ have a link
+ * to us. Set the parent directory to the root.
+ */
+ if (VFS_I(rd->sc->ip)->i_nlink == 0) {
+ rd->parent_ino = sc->mp->m_sb.sb_rootino;
+ return 0;
+ }
+
+ /* Scan the entire directory tree for the directory's parent. */
+ error = xrep_scan_for_parents(sc, sc->ip->i_ino,
+ xrep_dir_absorb_parent, rd);
+ if (error)
+ return error;
+
+ return rd->parent_ino == NULLFSINO ? -EFSCORRUPTED : 0;
+}
+
+/*
* Repair the directory metadata.
*
* XXX: Directory entry buffers can be multiple fsblocks in size. The buffer
@@ -726,9 +842,17 @@ xrep_dir(
if (error)
goto out;
- /* If we can't find the parent pointer, we're sunk. */
- if (rd.parent_ino == NULLFSINO)
- return -EFSCORRUPTED;
+ /*
+ * Validate the parent pointer that we observed while salvaging the
+ * directory; or scan the filesystem to find one. We drop the ILOCK
+ * on the directory being repaired to avoid ABBA deadlocks, though we
+ * maintain the directory IOLOCK to prevent concurrent modifications.
+ */
+ xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
+ error = xrep_dir_validate_parent(&rd);
+ xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
+ if (error)
+ goto out;
/*
* Invalidate and truncate all data fork extents. This is the point at
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 5705adc43a75..c42ac90b6c99 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -16,6 +16,7 @@
#include "xfs_dir2_priv.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
+#include "scrub/parent.h"
/* Set us up to scrub parents. */
int
@@ -67,7 +68,7 @@ xchk_parent_actor(
}
/* Count the number of dentries in the parent dir that point to this inode. */
-STATIC int
+int
xchk_parent_count_parent_dentries(
struct xfs_scrub *sc,
struct xfs_inode *parent,
diff --git a/fs/xfs/scrub/parent.h b/fs/xfs/scrub/parent.h
new file mode 100644
index 000000000000..6c79f7f99e9e
--- /dev/null
+++ b/fs/xfs/scrub/parent.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SCRUB_PARENT_H__
+#define __XFS_SCRUB_PARENT_H__
+
+int xchk_parent_count_parent_dentries(struct xfs_scrub *sc,
+ struct xfs_inode *parent, xfs_nlink_t *nlink);
+
+typedef int (*xrep_parents_iter_fn)(struct xfs_inode *dp, struct xfs_name *name,
+ unsigned int dtype, void *data);
+int xrep_scan_for_parents(struct xfs_scrub *sc, xfs_ino_t target_ino,
+ xrep_parents_iter_fn fn, void *data);
+bool xrep_parent_acceptable(struct xfs_scrub *sc, xfs_ino_t ino);
+
+#endif /* __XFS_SCRUB_PARENT_H__ */
diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c
new file mode 100644
index 000000000000..9c8cc7c2c206
--- /dev/null
+++ b/fs/xfs/scrub/parent_repair.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_trans_space.h"
+#include "xfs_iwalk.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/parent.h"
+
+/*
+ * Scanning Directory Trees for Parent Pointers
+ * ============================================
+ *
+ * Walk the inode table looking for directories. Scan each directory looking
+ * for directory entries that point to the target inode. Call a function on
+ * each match.
+ */
+
+struct xrep_parents_scan {
+ /* Context for scanning all dentries in a directory. */
+ struct dir_context dc;
+ void *data;
+ xrep_parents_iter_fn fn;
+
+ /* Potential parent of the directory we're scanning. */
+ xfs_ino_t *parent_ino;
+
+ /* This is the inode for which we want to find the parent. */
+ xfs_ino_t target_ino;
+
+ /* Directory that we're scanning. */
+ struct xfs_inode *scan_dir;
+
+ /* Errors encountered during scanning. */
+ int scan_error;
+};
+
+/*
+ * If this directory entry points to the directory we're rebuilding, then the
+ * directory we're scanning is the parent. Call our function.
+ *
+ * Note that the vfs readdir functions squash the nonzero codes that we return
+ * here into a "short" directory read, so the actual error codes are tracked
+ * and returned separately.
+ */
+STATIC int
+xrep_parents_scan_dentry(
+ struct dir_context *dc,
+ const char *name,
+ int namelen,
+ loff_t pos,
+ u64 ino,
+ unsigned type)
+{
+ struct xrep_parents_scan *rps;
+
+ rps = container_of(dc, struct xrep_parents_scan, dc);
+
+ if (ino == rps->target_ino) {
+ struct xfs_name xname = { .name = name, .len = namelen };
+
+ rps->scan_error = rps->fn(rps->scan_dir, &xname, type,
+ rps->data);
+ if (rps->scan_error)
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Walk this directory's entries looking for any that point to the target. */
+STATIC int
+xrep_parents_scan_inode(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ void *data)
+{
+ struct xrep_parents_scan *rps = data;
+ struct xfs_inode *dp;
+ loff_t oldpos;
+ size_t bufsize;
+ unsigned int lock_mode;
+ int locked;
+ int retries = 20;
+ int error;
+
+ if (ino == rps->target_ino)
+ return 0;
+
+ /* Grab inode and lock it so we can scan it. */
+ error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &dp);
+ if (error)
+ return error;
+
+ if (!S_ISDIR(VFS_I(dp)->i_mode))
+ goto out_rele;
+
+ /*
+ * Try a few times to take the directory IOLOCK. We have to use
+ * trylock here to avoid an ABBA deadlock with another thread that
+ * might have a parent locked and is asleep trying to lock our target.
+ * The solution for EDEADLOCK is usually to freeze the fs, so try a
+ * few times to get the inode to avoid that heavyweight solution.
+ */
+ while (!(locked = xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) && --retries)
+ delay(HZ / 10);
+ if (!locked) {
+ error = -EDEADLOCK;
+ goto out_rele;
+ }
+
+ /*
+ * If there are any blocks, read-ahead block 0 as we're almost certain
+ * to have the next operation be a read there. This is how we
+ * guarantee that the directory's extent map has been loaded, if there
+ * is one.
+ */
+ lock_mode = xfs_ilock_data_map_shared(dp);
+ if (dp->i_d.di_nextents > 0)
+ error = xfs_dir3_data_readahead(dp, 0, 0);
+ xfs_iunlock(dp, lock_mode);
+ if (error)
+ goto out_unlock;
+
+ /*
+ * Scan the directory to see if there it contains an entry pointing to
+ * the directory that we are repairing.
+ */
+ rps->scan_dir = dp;
+ bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, dp->i_d.di_size);
+ oldpos = 0;
+ while (true) {
+ error = xfs_readdir(tp, dp, &rps->dc, bufsize);
+ if (error)
+ break;
+ if (rps->scan_error) {
+ error = rps->scan_error;
+ break;
+ }
+ if (oldpos == rps->dc.pos)
+ break;
+ oldpos = rps->dc.pos;
+ }
+
+out_unlock:
+ xfs_iunlock(dp, XFS_IOLOCK_SHARED);
+out_rele:
+ xfs_irele(dp);
+ return error;
+}
+
+/* Is this an acceptable parent for the inode we're scrubbing? */
+bool
+xrep_parent_acceptable(
+ struct xfs_scrub *sc,
+ xfs_ino_t ino)
+{
+ return ino != NULLFSINO && ino != 0 && ino != sc->ip->i_ino &&
+ xfs_verify_dir_ino(sc->mp, ino);
+}
+
+/*
+ * Scan the directory tree to find the directory entries that point to this
+ * inode.
+ */
+int
+xrep_scan_for_parents(
+ struct xfs_scrub *sc,
+ xfs_ino_t target_ino,
+ xrep_parents_iter_fn fn,
+ void *data)
+{
+ struct xrep_parents_scan rps = {
+ .dc.actor = xrep_parents_scan_dentry,
+ .data = data,
+ .fn = fn,
+ .target_ino = target_ino,
+ };
+
+ return xfs_iwalk(sc->mp, sc->tp, 0, 0, xrep_parents_scan_inode, 0,
+ &rps);
+}