diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2020-03-19 10:13:12 -0700 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2020-06-01 21:16:37 -0700 |
commit | 2490c4ceed3fcfd17afc53c2ddc1edc04034a26c (patch) | |
tree | abc07e1e770226ed99b7e6410534b0b4e6e964fe | |
parent | daad2af6e6130efb1f6568772d4f32d67ec95cb0 (diff) |
xfs: teach online directory repair to scan for the parent
Enhance the online directory repair code to try to scan for a
directory's parent if it doesn't find it while salvaging the directory
contents.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/dir.c | 6 | ||||
-rw-r--r-- | fs/xfs/scrub/dir_repair.c | 130 | ||||
-rw-r--r-- | fs/xfs/scrub/parent.c | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/parent.h | 18 | ||||
-rw-r--r-- | fs/xfs/scrub/parent_repair.c | 205 |
6 files changed, 359 insertions, 4 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 11c1dc33088b..b09730c1c69f 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -178,6 +178,7 @@ xfs-y += $(addprefix scrub/, \ fscounters_repair.o \ ialloc_repair.o \ inode_repair.o \ + parent_repair.o \ refcount_repair.o \ repair.o \ rmap_repair.o \ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 53200aad0c4c..e318dd46cb15 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -28,6 +28,12 @@ xchk_setup_directory( unsigned int sz; int error; + if (sc->flags & XCHK_TRY_HARDER) { + error = xchk_fs_freeze(sc); + if (error) + return error; + } + error = xchk_setup_inode_contents(sc, ip, 0); if (error) return error; diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 041d25858dbc..a0b50631f822 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -24,6 +24,7 @@ #include "xfs_quota.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" +#include "xfs_iwalk.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -31,6 +32,7 @@ #include "scrub/repair.h" #include "scrub/array.h" #include "scrub/blob.h" +#include "scrub/parent.h" /* * Directory Repair @@ -682,6 +684,120 @@ xrep_dir_rebuild_tree( } /* + * If this directory entry points to the directory we're rebuilding, then the + * directory we're scanning is the parent. Remember the parent. + */ +STATIC int +xrep_dir_absorb_parent( + struct xfs_inode *dp, + struct xfs_name *name, + unsigned int dtype, + void *data) +{ + struct xrep_dir *rd = data; + int error = 0; + + /* Uhoh, more than one parent for a dir? */ + if (rd->parent_ino != NULLFSINO) + return -EFSCORRUPTED; + + if (xchk_should_terminate(rd->sc, &error)) + return error; + + /* We found a potential parent; remember this. */ + rd->parent_ino = dp->i_ino; + return 0; +} + +/* + * Make sure we return with a valid parent inode. + * + * If the directory salvaging step found a single '..' entry, check the + * alleged parent for a dentry pointing to the directory. If this succeds, + * we're done. Otherwise, scan the entire filesystem for a parent. + */ +STATIC int +xrep_dir_validate_parent( + struct xrep_dir *rd) +{ + struct xfs_scrub *sc = rd->sc; + struct xfs_inode *parent; + xfs_nlink_t expected_nlink, nlink; + int error; + + /* + * If the directory salvage scan found no parent or found an obviously + * incorrect parent, jump to the filesystem scan. + * + * Otherwise, if the alleged parent seems plausible, scan the directory + * to make sure it really points to us. + */ + if (!xrep_parent_acceptable(sc, rd->parent_ino)) + goto scan; + + /* + * Grab this parent inode. Since we release the inode before we cancel + * the scrub transaction and don't know if releasing the inode will + * trigger eofblocks cleanup (which allocates what would be a nested + * transaction), we avoid DONTCACHE here. + */ + error = xfs_iget(sc->mp, sc->tp, rd->parent_ino, XFS_IGET_UNTRUSTED, 0, + &parent); + if (error) + goto scan; + if (!S_ISDIR(VFS_I(parent)->i_mode)) + goto rele_scan; + + /* + * We prefer to keep the inode locked while we lock and search its + * alleged parent for a forward reference. If we can grab the iolock, + * validate the pointers and we're done. We must use nowait here to + * avoid an ABBA deadlock on the parent and the child inodes. + */ + if (!xfs_ilock_nowait(parent, XFS_IOLOCK_SHARED)) + goto rele_scan; + + /* + * If we're an unlinked directory, the parent /won't/ have a link + * to us. Otherwise, it should have one link. + */ + expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; + + error = xchk_parent_count_parent_dentries(sc, parent, &nlink); + if (error) + goto unlock_rele_scan; + + /* The parent is an exact match, we're done. */ + if (nlink == expected_nlink) { + xfs_iunlock(parent, XFS_IOLOCK_SHARED); + xfs_irele(parent); + return 0; + } + +unlock_rele_scan: + xfs_iunlock(parent, XFS_IOLOCK_SHARED); +rele_scan: + xfs_irele(parent); +scan: + /* + * If we're an unlinked directory, the parent /won't/ have a link + * to us. Set the parent directory to the root. + */ + if (VFS_I(rd->sc->ip)->i_nlink == 0) { + rd->parent_ino = sc->mp->m_sb.sb_rootino; + return 0; + } + + /* Scan the entire directory tree for the directory's parent. */ + error = xrep_scan_for_parents(sc, sc->ip->i_ino, + xrep_dir_absorb_parent, rd); + if (error) + return error; + + return rd->parent_ino == NULLFSINO ? -EFSCORRUPTED : 0; +} + +/* * Repair the directory metadata. * * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer @@ -726,9 +842,17 @@ xrep_dir( if (error) goto out; - /* If we can't find the parent pointer, we're sunk. */ - if (rd.parent_ino == NULLFSINO) - return -EFSCORRUPTED; + /* + * Validate the parent pointer that we observed while salvaging the + * directory; or scan the filesystem to find one. We drop the ILOCK + * on the directory being repaired to avoid ABBA deadlocks, though we + * maintain the directory IOLOCK to prevent concurrent modifications. + */ + xfs_iunlock(sc->ip, XFS_ILOCK_EXCL); + error = xrep_dir_validate_parent(&rd); + xfs_ilock(sc->ip, XFS_ILOCK_EXCL); + if (error) + goto out; /* * Invalidate and truncate all data fork extents. This is the point at diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 5705adc43a75..c42ac90b6c99 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -16,6 +16,7 @@ #include "xfs_dir2_priv.h" #include "scrub/scrub.h" #include "scrub/common.h" +#include "scrub/parent.h" /* Set us up to scrub parents. */ int @@ -67,7 +68,7 @@ xchk_parent_actor( } /* Count the number of dentries in the parent dir that point to this inode. */ -STATIC int +int xchk_parent_count_parent_dentries( struct xfs_scrub *sc, struct xfs_inode *parent, diff --git a/fs/xfs/scrub/parent.h b/fs/xfs/scrub/parent.h new file mode 100644 index 000000000000..6c79f7f99e9e --- /dev/null +++ b/fs/xfs/scrub/parent.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_SCRUB_PARENT_H__ +#define __XFS_SCRUB_PARENT_H__ + +int xchk_parent_count_parent_dentries(struct xfs_scrub *sc, + struct xfs_inode *parent, xfs_nlink_t *nlink); + +typedef int (*xrep_parents_iter_fn)(struct xfs_inode *dp, struct xfs_name *name, + unsigned int dtype, void *data); +int xrep_scan_for_parents(struct xfs_scrub *sc, xfs_ino_t target_ino, + xrep_parents_iter_fn fn, void *data); +bool xrep_parent_acceptable(struct xfs_scrub *sc, xfs_ino_t ino); + +#endif /* __XFS_SCRUB_PARENT_H__ */ diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c new file mode 100644 index 000000000000..9c8cc7c2c206 --- /dev/null +++ b/fs/xfs/scrub/parent_repair.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_trans_space.h" +#include "xfs_iwalk.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/parent.h" + +/* + * Scanning Directory Trees for Parent Pointers + * ============================================ + * + * Walk the inode table looking for directories. Scan each directory looking + * for directory entries that point to the target inode. Call a function on + * each match. + */ + +struct xrep_parents_scan { + /* Context for scanning all dentries in a directory. */ + struct dir_context dc; + void *data; + xrep_parents_iter_fn fn; + + /* Potential parent of the directory we're scanning. */ + xfs_ino_t *parent_ino; + + /* This is the inode for which we want to find the parent. */ + xfs_ino_t target_ino; + + /* Directory that we're scanning. */ + struct xfs_inode *scan_dir; + + /* Errors encountered during scanning. */ + int scan_error; +}; + +/* + * If this directory entry points to the directory we're rebuilding, then the + * directory we're scanning is the parent. Call our function. + * + * Note that the vfs readdir functions squash the nonzero codes that we return + * here into a "short" directory read, so the actual error codes are tracked + * and returned separately. + */ +STATIC int +xrep_parents_scan_dentry( + struct dir_context *dc, + const char *name, + int namelen, + loff_t pos, + u64 ino, + unsigned type) +{ + struct xrep_parents_scan *rps; + + rps = container_of(dc, struct xrep_parents_scan, dc); + + if (ino == rps->target_ino) { + struct xfs_name xname = { .name = name, .len = namelen }; + + rps->scan_error = rps->fn(rps->scan_dir, &xname, type, + rps->data); + if (rps->scan_error) + return 1; + } + + return 0; +} + +/* Walk this directory's entries looking for any that point to the target. */ +STATIC int +xrep_parents_scan_inode( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + void *data) +{ + struct xrep_parents_scan *rps = data; + struct xfs_inode *dp; + loff_t oldpos; + size_t bufsize; + unsigned int lock_mode; + int locked; + int retries = 20; + int error; + + if (ino == rps->target_ino) + return 0; + + /* Grab inode and lock it so we can scan it. */ + error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &dp); + if (error) + return error; + + if (!S_ISDIR(VFS_I(dp)->i_mode)) + goto out_rele; + + /* + * Try a few times to take the directory IOLOCK. We have to use + * trylock here to avoid an ABBA deadlock with another thread that + * might have a parent locked and is asleep trying to lock our target. + * The solution for EDEADLOCK is usually to freeze the fs, so try a + * few times to get the inode to avoid that heavyweight solution. + */ + while (!(locked = xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) && --retries) + delay(HZ / 10); + if (!locked) { + error = -EDEADLOCK; + goto out_rele; + } + + /* + * If there are any blocks, read-ahead block 0 as we're almost certain + * to have the next operation be a read there. This is how we + * guarantee that the directory's extent map has been loaded, if there + * is one. + */ + lock_mode = xfs_ilock_data_map_shared(dp); + if (dp->i_d.di_nextents > 0) + error = xfs_dir3_data_readahead(dp, 0, 0); + xfs_iunlock(dp, lock_mode); + if (error) + goto out_unlock; + + /* + * Scan the directory to see if there it contains an entry pointing to + * the directory that we are repairing. + */ + rps->scan_dir = dp; + bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, dp->i_d.di_size); + oldpos = 0; + while (true) { + error = xfs_readdir(tp, dp, &rps->dc, bufsize); + if (error) + break; + if (rps->scan_error) { + error = rps->scan_error; + break; + } + if (oldpos == rps->dc.pos) + break; + oldpos = rps->dc.pos; + } + +out_unlock: + xfs_iunlock(dp, XFS_IOLOCK_SHARED); +out_rele: + xfs_irele(dp); + return error; +} + +/* Is this an acceptable parent for the inode we're scrubbing? */ +bool +xrep_parent_acceptable( + struct xfs_scrub *sc, + xfs_ino_t ino) +{ + return ino != NULLFSINO && ino != 0 && ino != sc->ip->i_ino && + xfs_verify_dir_ino(sc->mp, ino); +} + +/* + * Scan the directory tree to find the directory entries that point to this + * inode. + */ +int +xrep_scan_for_parents( + struct xfs_scrub *sc, + xfs_ino_t target_ino, + xrep_parents_iter_fn fn, + void *data) +{ + struct xrep_parents_scan rps = { + .dc.actor = xrep_parents_scan_dentry, + .data = data, + .fn = fn, + .target_ino = target_ino, + }; + + return xfs_iwalk(sc->mp, sc->tp, 0, 0, xrep_parents_scan_inode, 0, + &rps); +} |