diff options
-rw-r--r-- | fs/xfs/Makefile | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_fs.h | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_health.h | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/common.h | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/dirtree.c | 985 | ||||
-rw-r--r-- | fs/xfs/scrub/dirtree.h | 178 | ||||
-rw-r--r-- | fs/xfs/scrub/dirtree_repair.c | 821 | ||||
-rw-r--r-- | fs/xfs/scrub/health.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/ino_bitmap.h | 37 | ||||
-rw-r--r-- | fs/xfs/scrub/orphanage.c | 6 | ||||
-rw-r--r-- | fs/xfs/scrub/orphanage.h | 8 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 7 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.h | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/stats.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.c | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 272 | ||||
-rw-r--r-- | fs/xfs/scrub/xfarray.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_health.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 1 |
21 files changed, 2337 insertions, 4 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index af99a455ce4d..d1ce1213797b 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -163,6 +163,7 @@ xfs-y += $(addprefix scrub/, \ common.o \ dabtree.o \ dir.o \ + dirtree.o \ fscounters.o \ health.o \ ialloc.o \ @@ -203,6 +204,7 @@ xfs-y += $(addprefix scrub/, \ bmap_repair.o \ cow_repair.o \ dir_repair.o \ + dirtree_repair.o \ findparent.o \ fscounters_repair.o \ ialloc_repair.o \ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index dd13bfa500f2..7ae1912cdbfc 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -411,6 +411,7 @@ struct xfs_bulkstat { #define XFS_BS_SICK_XATTR (1 << 5) /* extended attributes */ #define XFS_BS_SICK_SYMLINK (1 << 6) /* symbolic link remote target */ #define XFS_BS_SICK_PARENT (1 << 7) /* parent pointers */ +#define XFS_BS_SICK_DIRTREE (1 << 8) /* directory tree structure */ /* * Project quota id helpers (previously projid was 16bit only @@ -719,9 +720,10 @@ struct xfs_scrub_metadata { #define XFS_SCRUB_TYPE_QUOTACHECK 25 /* quota counters */ #define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */ #define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */ +#define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */ /* Number of scrub subcommands. */ -#define XFS_SCRUB_TYPE_NR 28 +#define XFS_SCRUB_TYPE_NR 29 /* i: Repair this metadata. */ #define XFS_SCRUB_IFLAG_REPAIR (1u << 0) diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 3c64b5f9bd68..b0edb4288e59 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -95,6 +95,7 @@ struct xfs_da_args; /* Don't propagate sick status to ag health summary during inactivation */ #define XFS_SICK_INO_FORGET (1 << 12) +#define XFS_SICK_INO_DIRTREE (1 << 13) /* directory tree structure */ /* Primary evidence of health problems in a given group. */ #define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \ @@ -125,7 +126,8 @@ struct xfs_da_args; XFS_SICK_INO_DIR | \ XFS_SICK_INO_XATTR | \ XFS_SICK_INO_SYMLINK | \ - XFS_SICK_INO_PARENT) + XFS_SICK_INO_PARENT | \ + XFS_SICK_INO_DIRTREE) #define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \ XFS_SICK_INO_BMBTA_ZAPPED | \ diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index e00466f40482..39465e39dc5f 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -92,6 +92,7 @@ int xchk_setup_directory(struct xfs_scrub *sc); int xchk_setup_xattr(struct xfs_scrub *sc); int xchk_setup_symlink(struct xfs_scrub *sc); int xchk_setup_parent(struct xfs_scrub *sc); +int xchk_setup_dirtree(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xchk_setup_rtbitmap(struct xfs_scrub *sc); int xchk_setup_rtsummary(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c new file mode 100644 index 000000000000..bde58fb561ea --- /dev/null +++ b/fs/xfs/scrub/dirtree.c @@ -0,0 +1,985 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2023-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_attr.h" +#include "xfs_parent.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/bitmap.h" +#include "scrub/ino_bitmap.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" +#include "scrub/listxattr.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/orphanage.h" +#include "scrub/dirtree.h" + +/* + * Directory Tree Structure Validation + * =================================== + * + * Validating the tree qualities of the directory tree structure can be + * difficult. If the tree is frozen, running a depth (or breadth) first search + * and marking a bitmap suffices to determine if there is a cycle. XORing the + * mark bitmap with the inode bitmap afterwards tells us if there are + * disconnected cycles. If the tree is not frozen, directory updates can move + * subtrees across the scanner wavefront, which complicates the design greatly. + * + * Directory parent pointers change that by enabling an incremental approach to + * validation of the tree structure. Instead of using one thread to scan the + * entire filesystem, we instead can have multiple threads walking individual + * subdirectories upwards to the root. In a perfect world, the IOLOCK would + * suffice to stabilize two directories in a parent -> child relationship. + * Unfortunately, the VFS does not take the IOLOCK when moving a child + * subdirectory, so we instead synchronize on ILOCK and use dirent update hooks + * to detect a race. If a race occurs in a path, we restart the scan. + * + * If the walk terminates without reaching the root, we know the path is + * disconnected and ought to be attached to the lost and found. If on the walk + * we find the same subdir that we're scanning, we know this is a cycle and + * should delete an incoming edge. If we find multiple paths to the root, we + * know to delete an incoming edge. + * + * There are two big hitches with this approach: first, all file link counts + * must be correct to prevent other writers from doing the wrong thing with the + * directory tree structure. Second, because we're walking upwards in a tree + * of arbitrary depth, we cannot hold all the ILOCKs. Instead, we will use a + * directory update hook to invalidate the scan results if one of the paths + * we've scanned has changed. + */ + +/* Clean up the dirtree checking resources. */ +STATIC void +xchk_dirtree_buf_cleanup( + void *buf) +{ + struct xchk_dirtree *dl = buf; + struct xchk_dirpath *path, *n; + + if (dl->scan_ino != NULLFSINO) + xfs_dir_hook_del(dl->sc->mp, &dl->dhook); + + xchk_dirtree_for_each_path_safe(dl, path, n) { + list_del_init(&path->list); + xino_bitmap_destroy(&path->seen_inodes); + kfree(path); + } + + xfblob_destroy(dl->path_names); + xfarray_destroy(dl->path_steps); + mutex_destroy(&dl->lock); +} + +/* Set us up to look for directory loops. */ +int +xchk_setup_dirtree( + struct xfs_scrub *sc) +{ + struct xchk_dirtree *dl; + char *descr; + int error; + + xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); + + if (xchk_could_repair(sc)) { + error = xrep_setup_dirtree(sc); + if (error) + return error; + } + + dl = kvzalloc(sizeof(struct xchk_dirtree), XCHK_GFP_FLAGS); + if (!dl) + return -ENOMEM; + dl->sc = sc; + dl->xname.name = dl->namebuf; + dl->hook_xname.name = dl->hook_namebuf; + INIT_LIST_HEAD(&dl->path_list); + dl->root_ino = NULLFSINO; + dl->scan_ino = NULLFSINO; + dl->parent_ino = NULLFSINO; + + mutex_init(&dl->lock); + + descr = xchk_xfile_ino_descr(sc, "dirtree path steps"); + error = xfarray_create(descr, 0, sizeof(struct xchk_dirpath_step), + &dl->path_steps); + kfree(descr); + if (error) + goto out_dl; + + descr = xchk_xfile_ino_descr(sc, "dirtree path names"); + error = xfblob_create(descr, &dl->path_names); + kfree(descr); + if (error) + goto out_steps; + + error = xchk_setup_inode_contents(sc, 0); + if (error) + goto out_names; + + sc->buf = dl; + sc->buf_cleanup = xchk_dirtree_buf_cleanup; + return 0; + +out_names: + xfblob_destroy(dl->path_names); +out_steps: + xfarray_destroy(dl->path_steps); +out_dl: + mutex_destroy(&dl->lock); + kvfree(dl); + return error; +} + +/* + * Add the parent pointer described by @dl->pptr to the given path as a new + * step. Returns -ELNRNG if the path is too deep. + */ +int +xchk_dirpath_append( + struct xchk_dirtree *dl, + struct xfs_inode *ip, + struct xchk_dirpath *path, + const struct xfs_name *name, + const struct xfs_parent_rec *pptr) +{ + struct xchk_dirpath_step step = { + .pptr_rec = *pptr, /* struct copy */ + .name_len = name->len, + }; + int error; + + /* + * If this path is more than 2 billion steps long, this directory tree + * is too far gone to fix. + */ + if (path->nr_steps >= XFS_MAXLINK) + return -ELNRNG; + + error = xfblob_storename(dl->path_names, &step.name_cookie, name); + if (error) + return error; + + error = xino_bitmap_set(&path->seen_inodes, ip->i_ino); + if (error) + return error; + + error = xfarray_append(dl->path_steps, &step); + if (error) + return error; + + path->nr_steps++; + return 0; +} + +/* + * Create an xchk_path for each parent pointer of the directory that we're + * scanning. For each path created, we will eventually try to walk towards the + * root with the goal of deleting all parents except for one that leads to the + * root. + * + * Returns -EFSCORRUPTED to signal that the inode being scanned has a corrupt + * parent pointer and hence there's no point in continuing; or -ENOSR if there + * are too many parent pointers for this directory. + */ +STATIC int +xchk_dirtree_create_path( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + struct xfs_name xname = { + .name = name, + .len = namelen, + }; + struct xchk_dirtree *dl = priv; + struct xchk_dirpath *path; + const struct xfs_parent_rec *rec = value; + int error; + + if (!(attr_flags & XFS_ATTR_PARENT)) + return 0; + + error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value, + valuelen, NULL, NULL); + if (error) + return error; + + /* + * If there are more than 2 billion actual parent pointers for this + * subdirectory, this fs is too far gone to fix. + */ + if (dl->nr_paths >= XFS_MAXLINK) + return -ENOSR; + + trace_xchk_dirtree_create_path(sc, ip, dl->nr_paths, &xname, rec); + + /* + * Create a new xchk_path structure to remember this parent pointer + * and record the first name step. + */ + path = kmalloc(sizeof(struct xchk_dirpath), XCHK_GFP_FLAGS); + if (!path) + return -ENOMEM; + + INIT_LIST_HEAD(&path->list); + xino_bitmap_init(&path->seen_inodes); + path->nr_steps = 0; + path->outcome = XCHK_DIRPATH_SCANNING; + + error = xchk_dirpath_append(dl, sc->ip, path, &xname, rec); + if (error) + goto out_path; + + path->first_step = xfarray_length(dl->path_steps) - 1; + path->second_step = XFARRAY_NULLIDX; + path->path_nr = dl->nr_paths; + + list_add_tail(&path->list, &dl->path_list); + dl->nr_paths++; + return 0; +out_path: + kfree(path); + return error; +} + +/* + * Validate that the first step of this path still has a corresponding + * parent pointer in @sc->ip. We probably dropped @sc->ip's ILOCK while + * walking towards the roots, which is why this is necessary. + * + * This function has a side effect of loading the first parent pointer of this + * path into the parent pointer scratch pad. This prepares us to walk up the + * directory tree towards the root. Returns -ESTALE if the scan data is now + * out of date. + */ +STATIC int +xchk_dirpath_revalidate( + struct xchk_dirtree *dl, + struct xchk_dirpath *path) +{ + struct xfs_scrub *sc = dl->sc; + int error; + + /* + * Look up the parent pointer that corresponds to the start of this + * path. If the parent pointer has disappeared on us, dump all the + * scan results and try again. + */ + error = xfs_parent_lookup(sc->tp, sc->ip, &dl->xname, &dl->pptr_rec, + &dl->pptr_args); + if (error == -ENOATTR) { + trace_xchk_dirpath_disappeared(dl->sc, sc->ip, path->path_nr, + path->first_step, &dl->xname, &dl->pptr_rec); + dl->stale = true; + return -ESTALE; + } + + return error; +} + +/* + * Walk the parent pointers of a directory at the end of a path and record + * the parent that we find in @dl->xname/pptr_rec. + */ +STATIC int +xchk_dirpath_find_next_step( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + struct xchk_dirtree *dl = priv; + const struct xfs_parent_rec *rec = value; + int error; + + if (!(attr_flags & XFS_ATTR_PARENT)) + return 0; + + error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value, + valuelen, NULL, NULL); + if (error) + return error; + + /* + * If we've already set @dl->pptr_rec, then this directory has multiple + * parents. Signal this back to the caller via -EMLINK. + */ + if (dl->parents_found > 0) + return -EMLINK; + + dl->parents_found++; + memcpy(dl->namebuf, name, namelen); + dl->xname.len = namelen; + dl->pptr_rec = *rec; /* struct copy */ + return 0; +} + +/* Set and log the outcome of a path walk. */ +static inline void +xchk_dirpath_set_outcome( + struct xchk_dirtree *dl, + struct xchk_dirpath *path, + enum xchk_dirpath_outcome outcome) +{ + trace_xchk_dirpath_set_outcome(dl->sc, path->path_nr, path->nr_steps, + outcome); + + path->outcome = outcome; +} + +/* + * Scan the directory at the end of this path for its parent directory link. + * If we find one, extend the path. Returns -ESTALE if the scan data out of + * date. Returns -EFSCORRUPTED if the parent pointer is bad; or -ELNRNG if + * the path got too deep. + */ +STATIC int +xchk_dirpath_step_up( + struct xchk_dirtree *dl, + struct xchk_dirpath *path) +{ + struct xfs_scrub *sc = dl->sc; + struct xfs_inode *dp; + xfs_ino_t parent_ino = be64_to_cpu(dl->pptr_rec.p_ino); + unsigned int lock_mode; + int error; + + /* Grab and lock the parent directory. */ + error = xchk_iget(sc, parent_ino, &dp); + if (error) + return error; + + lock_mode = xfs_ilock_attr_map_shared(dp); + mutex_lock(&dl->lock); + + if (dl->stale) { + error = -ESTALE; + goto out_scanlock; + } + + /* We've reached the root directory; the path is ok. */ + if (parent_ino == dl->root_ino) { + xchk_dirpath_set_outcome(dl, path, XCHK_DIRPATH_OK); + error = 0; + goto out_scanlock; + } + + /* + * The inode being scanned is its own distant ancestor! Get rid of + * this path. + */ + if (parent_ino == sc->ip->i_ino) { + xchk_dirpath_set_outcome(dl, path, XCHK_DIRPATH_DELETE); + error = 0; + goto out_scanlock; + } + + /* + * We've seen this inode before during the path walk. There's a loop + * above us in the directory tree. This probably means that we cannot + * continue, but let's keep walking paths to get a full picture. + */ + if (xino_bitmap_test(&path->seen_inodes, parent_ino)) { + xchk_dirpath_set_outcome(dl, path, XCHK_DIRPATH_LOOP); + error = 0; + goto out_scanlock; + } + + /* The handle encoded in the parent pointer must match. */ + if (VFS_I(dp)->i_generation != be32_to_cpu(dl->pptr_rec.p_gen)) { + trace_xchk_dirpath_badgen(dl->sc, dp, path->path_nr, + path->nr_steps, &dl->xname, &dl->pptr_rec); + error = -EFSCORRUPTED; + goto out_scanlock; + } + + /* Parent pointer must point up to a directory. */ + if (!S_ISDIR(VFS_I(dp)->i_mode)) { + trace_xchk_dirpath_nondir_parent(dl->sc, dp, path->path_nr, + path->nr_steps, &dl->xname, &dl->pptr_rec); + error = -EFSCORRUPTED; + goto out_scanlock; + } + + /* Parent cannot be an unlinked directory. */ + if (VFS_I(dp)->i_nlink == 0) { + trace_xchk_dirpath_unlinked_parent(dl->sc, dp, path->path_nr, + path->nr_steps, &dl->xname, &dl->pptr_rec); + error = -EFSCORRUPTED; + goto out_scanlock; + } + + /* + * If the extended attributes look as though they has been zapped by + * the inode record repair code, we cannot scan for parent pointers. + */ + if (xchk_pptr_looks_zapped(dp)) { + error = -EBUSY; + xchk_set_incomplete(sc); + goto out_scanlock; + } + + /* + * Walk the parent pointers of @dp to find the parent of this directory + * to find the next step in our walk. If we find that @dp has exactly + * one parent, the parent pointer information will be stored in + * @dl->pptr_rec. This prepares us for the next step of the walk. + */ + mutex_unlock(&dl->lock); + dl->parents_found = 0; + error = xchk_xattr_walk(sc, dp, xchk_dirpath_find_next_step, NULL, dl); + mutex_lock(&dl->lock); + if (error == -EFSCORRUPTED || error == -EMLINK || + (!error && dl->parents_found == 0)) { + /* + * Further up the directory tree from @sc->ip, we found a + * corrupt parent pointer, multiple parent pointers while + * finding this directory's parent, or zero parents despite + * having a nonzero link count. Keep looking for other paths. + */ + xchk_dirpath_set_outcome(dl, path, XCHK_DIRPATH_CORRUPT); + error = 0; + goto out_scanlock; + } + if (error) + goto out_scanlock; + + if (dl->stale) { + error = -ESTALE; + goto out_scanlock; + } + + trace_xchk_dirpath_found_next_step(sc, dp, path->path_nr, + path->nr_steps, &dl->xname, &dl->pptr_rec); + + /* Append to the path steps */ + error = xchk_dirpath_append(dl, dp, path, &dl->xname, &dl->pptr_rec); + if (error) + goto out_scanlock; + + if (path->second_step == XFARRAY_NULLIDX) + path->second_step = xfarray_length(dl->path_steps) - 1; + +out_scanlock: + mutex_unlock(&dl->lock); + xfs_iunlock(dp, lock_mode); + xchk_irele(sc, dp); + return error; +} + +/* + * Walk the directory tree upwards towards what is hopefully the root + * directory, recording path steps as we go. The current path components are + * stored in dl->pptr_rec and dl->xname. + * + * Returns -ESTALE if the scan data are out of date. Returns -EFSCORRUPTED + * only if the direct parent pointer of @sc->ip associated with this path is + * corrupt. + */ +STATIC int +xchk_dirpath_walk_upwards( + struct xchk_dirtree *dl, + struct xchk_dirpath *path) +{ + struct xfs_scrub *sc = dl->sc; + int error; + + ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL); + + /* Reload the start of this path and make sure it's still there. */ + error = xchk_dirpath_revalidate(dl, path); + if (error) + return error; + + trace_xchk_dirpath_walk_upwards(sc, sc->ip, path->path_nr, &dl->xname, + &dl->pptr_rec); + + /* + * The inode being scanned is its own direct ancestor! + * Get rid of this path. + */ + if (be64_to_cpu(dl->pptr_rec.p_ino) == sc->ip->i_ino) { + xchk_dirpath_set_outcome(dl, path, XCHK_DIRPATH_DELETE); + return 0; + } + + /* + * Drop ILOCK_EXCL on the inode being scanned. We still hold + * IOLOCK_EXCL on it, so it cannot move around or be renamed. + * + * Beyond this point we're walking up the directory tree, which means + * that we can acquire and drop the ILOCK on an alias of sc->ip. The + * ILOCK state is no longer tracked in the scrub context. Hence we + * must drop @sc->ip's ILOCK during the walk. + */ + mutex_unlock(&dl->lock); + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + /* + * Take the first step in the walk towards the root by checking the + * start of this path, which is a direct parent pointer of @sc->ip. + * If we see any kind of error here (including corruptions), the parent + * pointer of @sc->ip is corrupt. Stop the whole scan. + */ + error = xchk_dirpath_step_up(dl, path); + if (error) { + xchk_ilock(sc, XFS_ILOCK_EXCL); + mutex_lock(&dl->lock); + return error; + } + + /* + * Take steps upward from the second step in this path towards the + * root. If we hit corruption errors here, there's a problem + * *somewhere* in the path, but we don't need to stop scanning. + */ + while (!error && path->outcome == XCHK_DIRPATH_SCANNING) + error = xchk_dirpath_step_up(dl, path); + + /* Retake the locks we had, mark paths, etc. */ + xchk_ilock(sc, XFS_ILOCK_EXCL); + mutex_lock(&dl->lock); + if (error == -EFSCORRUPTED) { + xchk_dirpath_set_outcome(dl, path, XCHK_DIRPATH_CORRUPT); + error = 0; + } + if (!error && dl->stale) + return -ESTALE; + return error; +} + +/* + * Decide if this path step has been touched by this live update. Returns + * 1 for yes, 0 for no, or a negative errno. + */ +STATIC int +xchk_dirpath_step_is_stale( + struct xchk_dirtree *dl, + struct xchk_dirpath *path, + unsigned int step_nr, + xfarray_idx_t step_idx, + struct xfs_dir_update_params *p, + xfs_ino_t *cursor) +{ + struct xchk_dirpath_step step; + xfs_ino_t child_ino = *cursor; + int error; + + error = xfarray_load(dl->path_steps, step_idx, &step); + if (error) + return error; + *cursor = be64_to_cpu(step.pptr_rec.p_ino); + + /* + * If the parent and child being updated are not the ones mentioned in + * this path step, the scan data is still ok. + */ + if (p->ip->i_ino != child_ino || p->dp->i_ino != *cursor) + return 0; + + /* + * If the dirent name lengths or byte sequences are different, the scan + * data is still ok. + */ + if (p->name->len != step.name_len) + return 0; + + error = xfblob_loadname(dl->path_names, step.name_cookie, + &dl->hook_xname, step.name_len); + if (error) + return error; + + if (memcmp(dl->hook_xname.name, p->name->name, p->name->len) != 0) + return 0; + + /* + * If the update comes from the repair code itself, walk the state + * machine forward. + */ + if (p->ip->i_ino == dl->scan_ino && + path->outcome == XREP_DIRPATH_ADOPTING) { + xchk_dirpath_set_outcome(dl, path, XREP_DIRPATH_ADOPTED); + return 0; + } + + if (p->ip->i_ino == dl->scan_ino && + path->outcome == XREP_DIRPATH_DELETING) { + xchk_dirpath_set_outcome(dl, path, XREP_DIRPATH_DELETED); + return 0; + } + + /* Exact match, scan data is out of date. */ + trace_xchk_dirpath_changed(dl->sc, path->path_nr, step_nr, p->dp, + p->ip, p->name); + return 1; +} + +/* + * Decide if this path has been touched by this live update. Returns 1 for + * yes, 0 for no, or a negative errno. + */ +STATIC int +xchk_dirpath_is_stale( + struct xchk_dirtree *dl, + struct xchk_dirpath *path, + struct xfs_dir_update_params *p) +{ + xfs_ino_t cursor = dl->scan_ino; + xfarray_idx_t idx = path->first_step; + unsigned int i; + int ret; + + /* + * The child being updated has not been seen by this path at all; this + * path cannot be stale. + */ + if (!xino_bitmap_test(&path->seen_inodes, p->ip->i_ino)) + return 0; + + ret = xchk_dirpath_step_is_stale(dl, path, 0, idx, p, &cursor); + if (ret != 0) + return ret; + + for (i = 1, idx = path->second_step; i < path->nr_steps; i++, idx++) { + ret = xchk_dirpath_step_is_stale(dl, path, i, idx, p, &cursor); + if (ret != 0) + return ret; + } + + return 0; +} + +/* + * Decide if a directory update from the regular filesystem touches any of the + * paths we've scanned, and invalidate the scan data if true. + */ +STATIC int +xchk_dirtree_live_update( + struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct xfs_dir_update_params *p = data; + struct xchk_dirtree *dl; + struct xchk_dirpath *path; + int ret; + + dl = container_of(nb, struct xchk_dirtree, dhook.dirent_hook.nb); + + trace_xchk_dirtree_live_update(dl->sc, p->dp, action, p->ip, p->delta, + p->name); + + mutex_lock(&dl->lock); + + if (dl->stale || dl->aborted) + goto out_unlock; + + xchk_dirtree_for_each_path(dl, path) { + ret = xchk_dirpath_is_stale(dl, path, p); + if (ret < 0) { + dl->aborted = true; + break; + } + if (ret == 1) { + dl->stale = true; + break; + } + } + +out_unlock: + mutex_unlock(&dl->lock); + return NOTIFY_DONE; +} + +/* Delete all the collected path information. */ +STATIC void +xchk_dirtree_reset( + void *buf) +{ + struct xchk_dirtree *dl = buf; + struct xchk_dirpath *path, *n; + + ASSERT(dl->sc->ilock_flags & XFS_ILOCK_EXCL); + + xchk_dirtree_for_each_path_safe(dl, path, n) { + list_del_init(&path->list); + xino_bitmap_destroy(&path->seen_inodes); + kfree(path); + } + dl->nr_paths = 0; + + xfarray_truncate(dl->path_steps); + xfblob_truncate(dl->path_names); + + dl->stale = false; +} + +/* + * Load the name/pptr from the first step in this path into @dl->pptr_rec and + * @dl->xname. + */ +STATIC int +xchk_dirtree_load_path( + struct xchk_dirtree *dl, + struct xchk_dirpath *path) +{ + struct xchk_dirpath_step step; + int error; + + error = xfarray_load(dl->path_steps, path->first_step, &step); + if (error) + return error; + + error = xfblob_loadname(dl->path_names, step.name_cookie, &dl->xname, + step.name_len); + if (error) + return error; + + dl->pptr_rec = step.pptr_rec; /* struct copy */ + return 0; +} + +/* + * For each parent pointer of this subdir, trace a path upwards towards the + * root directory and record what we find. Returns 0 for success; + * -EFSCORRUPTED if walking the parent pointers of @sc->ip failed, -ELNRNG if a + * path was too deep; -ENOSR if there were too many parent pointers; or + * a negative errno. + */ +int +xchk_dirtree_find_paths_to_root( + struct xchk_dirtree *dl) +{ + struct xfs_scrub *sc = dl->sc; + struct xchk_dirpath *path; + int error = 0; + + do { + if (xchk_should_terminate(sc, &error)) + return error; + + xchk_dirtree_reset(dl); + + /* + * If the extended attributes look as though they has been + * zapped by the inode record repair code, we cannot scan for + * parent pointers. + */ + if (xchk_pptr_looks_zapped(sc->ip)) { + xchk_set_incomplete(sc); + return -EBUSY; + } + + /* + * Create path walk contexts for each parent of the directory + * that is being scanned. Directories are supposed to have + * only one parent, but this is how we detect multiple parents. + */ + error = xchk_xattr_walk(sc, sc->ip, xchk_dirtree_create_path, + NULL, dl); + if (error) + return error; + + xchk_dirtree_for_each_path(dl, path) { + /* Load path components into dl->pptr/xname */ + error = xchk_dirtree_load_path(dl, path); + if (error) + return error; + + /* + * Try to walk up each path to the root. This enables + * us to find directory loops in ancestors, and the + * like. + */ + error = xchk_dirpath_walk_upwards(dl, path); + if (error == -EFSCORRUPTED) { + /* + * A parent pointer of @sc->ip is bad, don't + * bother continuing. + */ + break; + } + if (error == -ESTALE) { + /* This had better be an invalidation. */ + ASSERT(dl->stale); + break; + } + if (error) + return error; + if (dl->aborted) + return 0; + } + } while (dl->stale); + + return error; +} + +/* + * Figure out what to do with the paths we tried to find. Do not call this + * if the scan results are stale. + */ +void +xchk_dirtree_evaluate( + struct xchk_dirtree *dl, + struct xchk_dirtree_outcomes *oc) +{ + struct xchk_dirpath *path; + + ASSERT(!dl->stale); + + /* Scan the paths we have to decide what to do. */ + memset(oc, 0, sizeof(struct xchk_dirtree_outcomes)); + xchk_dirtree_for_each_path(dl, path) { + trace_xchk_dirpath_evaluate_path(dl->sc, path->path_nr, + path->nr_steps, path->outcome); + + switch (path->outcome) { + case XCHK_DIRPATH_SCANNING: + /* shouldn't get here */ + ASSERT(0); + break; + case XCHK_DIRPATH_DELETE: + /* This one is already going away. */ + oc->bad++; + break; + case XCHK_DIRPATH_CORRUPT: + case XCHK_DIRPATH_LOOP: + /* Couldn't find the end of this path. */ + oc->suspect++; + break; + case XCHK_DIRPATH_STALE: + /* shouldn't get here either */ + ASSERT(0); + break; + case XCHK_DIRPATH_OK: + /* This path got all the way to the root. */ + oc->good++; + break; + case XREP_DIRPATH_DELETING: + case XREP_DIRPATH_DELETED: + case XREP_DIRPATH_ADOPTING: + case XREP_DIRPATH_ADOPTED: + /* These should not be in progress! */ + ASSERT(0); + break; + } + } + + trace_xchk_dirtree_evaluate(dl, oc); +} + +/* Look for directory loops. */ +int +xchk_dirtree( + struct xfs_scrub *sc) +{ + struct xchk_dirtree_outcomes oc; + struct xchk_dirtree *dl = sc->buf; + int error; + + /* + * Nondirectories do not point downwards to other files, so they cannot + * cause a cycle in the directory tree. + */ + if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) + return -ENOENT; + + ASSERT(xfs_has_parent(sc->mp)); + + /* + * Find the root of the directory tree. Remember which directory to + * scan, because the hook doesn't detach until after sc->ip gets + * released during teardown. + */ + dl->root_ino = sc->mp->m_rootip->i_ino; + dl->scan_ino = sc->ip->i_ino; + + trace_xchk_dirtree_start(sc->ip, sc->sm, 0); + + /* + * Hook into the directory entry code so that we can capture updates to + * paths that we have already scanned. The scanner thread takes each + * directory's ILOCK, which means that any in-progress directory update + * will finish before we can scan the directory. + */ + ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); + xfs_dir_hook_setup(&dl->dhook, xchk_dirtree_live_update); + error = xfs_dir_hook_add(sc->mp, &dl->dhook); + if (error) + goto out; + + mutex_lock(&dl->lock); + + /* Trace each parent pointer's path to the root. */ + error = xchk_dirtree_find_paths_to_root(dl); + if (error == -EFSCORRUPTED || error == -ELNRNG || error == -ENOSR) { + /* + * Don't bother walking the paths if the xattr structure or the + * parent pointers are corrupt; this scan cannot be completed + * without full information. + */ + xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); + error = 0; + goto out_scanlock; + } + if (error == -EBUSY) { + /* + * We couldn't scan some directory's parent pointers because + * the attr fork looked like it had been zapped. The + * scan was marked incomplete, so no further error code + * is necessary. + */ + error = 0; + goto out_scanlock; + } + if (error) + goto out_scanlock; + if (dl->aborted) { + xchk_set_incomplete(sc); + goto out_scanlock; + } + + /* Assess what we found in our path evaluation. */ + xchk_dirtree_evaluate(dl, &oc); + if (xchk_dirtree_parentless(dl)) { + if (oc.good || oc.bad || oc.suspect) + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + } else { + if (oc.bad || oc.good + oc.suspect != 1) + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + if (oc.suspect) + xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); + } + +out_scanlock: + mutex_unlock(&dl->lock); +out: + trace_xchk_dirtree_done(sc->ip, sc->sm, error); + return error; +} diff --git a/fs/xfs/scrub/dirtree.h b/fs/xfs/scrub/dirtree.h new file mode 100644 index 000000000000..1e1686365c61 --- /dev/null +++ b/fs/xfs/scrub/dirtree.h @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2023-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_DIRTREE_H__ +#define __XFS_SCRUB_DIRTREE_H__ + +/* + * Each of these represents one parent pointer path step in a chain going + * up towards the directory tree root. These are stored inside an xfarray. + */ +struct xchk_dirpath_step { + /* Directory entry name associated with this parent link. */ + xfblob_cookie name_cookie; + unsigned int name_len; + + /* Handle of the parent directory. */ + struct xfs_parent_rec pptr_rec; +}; + +enum xchk_dirpath_outcome { + XCHK_DIRPATH_SCANNING = 0, /* still being put together */ + XCHK_DIRPATH_DELETE, /* delete this path */ + XCHK_DIRPATH_CORRUPT, /* corruption detected in path */ + XCHK_DIRPATH_LOOP, /* cycle detected further up */ + XCHK_DIRPATH_STALE, /* path is stale */ + XCHK_DIRPATH_OK, /* path reaches the root */ + + XREP_DIRPATH_DELETING, /* path is being deleted */ + XREP_DIRPATH_DELETED, /* path has been deleted */ + XREP_DIRPATH_ADOPTING, /* path is being adopted */ + XREP_DIRPATH_ADOPTED, /* path has been adopted */ +}; + +/* + * Each of these represents one parent pointer path out of the directory being + * scanned. These exist in-core, and hopefully there aren't more than a + * handful of them. + */ +struct xchk_dirpath { + struct list_head list; + + /* Index of the first step in this path. */ + xfarray_idx_t first_step; + + /* Index of the second step in this path. */ + xfarray_idx_t second_step; + + /* Inodes seen while walking this path. */ + struct xino_bitmap seen_inodes; + + /* Number of steps in this path. */ + unsigned int nr_steps; + + /* Which path is this? */ + unsigned int path_nr; + + /* What did we conclude from following this path? */ + enum xchk_dirpath_outcome outcome; +}; + +struct xchk_dirtree_outcomes { + /* Number of XCHK_DIRPATH_DELETE */ + unsigned int bad; + + /* Number of XCHK_DIRPATH_CORRUPT or XCHK_DIRPATH_LOOP */ + unsigned int suspect; + + /* Number of XCHK_DIRPATH_OK */ + unsigned int good; + + /* Directory needs to be added to lost+found */ + bool needs_adoption; +}; + +struct xchk_dirtree { + struct xfs_scrub *sc; + + /* Root inode that we're looking for. */ + xfs_ino_t root_ino; + + /* + * This is the inode that we're scanning. The live update hook can + * continue to be called after xchk_teardown drops sc->ip but before + * it calls buf_cleanup, so we keep a copy. + */ + xfs_ino_t scan_ino; + + /* + * If we start deleting redundant paths to this subdirectory, this is + * the inode number of the surviving parent and the dotdot entry will + * be set to this value. If the value is NULLFSINO, then use @root_ino + * as a stand-in until the orphanage can adopt the subdirectory. + */ + xfs_ino_t parent_ino; + + /* Scratch buffer for scanning pptr xattrs */ + struct xfs_parent_rec pptr_rec; + struct xfs_da_args pptr_args; + + /* Name buffer */ + struct xfs_name xname; + char namebuf[MAXNAMELEN]; + + /* Information for reparenting this directory. */ + struct xrep_adoption adoption; + + /* + * Hook into directory updates so that we can receive live updates + * from other writer threads. + */ + struct xfs_dir_hook dhook; + + /* Parent pointer update arguments. */ + struct xfs_parent_args ppargs; + + /* lock for everything below here */ + struct mutex lock; + + /* buffer for the live update functions to use for dirent names */ + struct xfs_name hook_xname; + unsigned char hook_namebuf[MAXNAMELEN]; + + /* + * All path steps observed during this scan. Each of the path + * steps for a particular pathwalk are recorded in sequential + * order in the xfarray. A pathwalk ends either with a step + * pointing to the root directory (success) or pointing to NULLFSINO + * (loop detected, empty dir detected, etc). + */ + struct xfarray *path_steps; + + /* All names observed during this scan. */ + struct xfblob *path_names; + + /* All paths being tracked by this scanner. */ + struct list_head path_list; + + /* Number of paths in path_list. */ + unsigned int nr_paths; + + /* Number of parents found by a pptr scan. */ + unsigned int parents_found; + + /* Have the path data been invalidated by a concurrent update? */ + bool stale:1; + + /* Has the scan been aborted? */ + bool aborted:1; +}; + +#define xchk_dirtree_for_each_path_safe(dl, path, n) \ + list_for_each_entry_safe((path), (n), &(dl)->path_list, list) + +#define xchk_dirtree_for_each_path(dl, path) \ + list_for_each_entry((path), &(dl)->path_list, list) + +static inline bool +xchk_dirtree_parentless(const struct xchk_dirtree *dl) +{ + struct xfs_scrub *sc = dl->sc; + + if (sc->ip == sc->mp->m_rootip) + return true; + if (VFS_I(sc->ip)->i_nlink == 0) + return true; + return false; +} + +int xchk_dirtree_find_paths_to_root(struct xchk_dirtree *dl); +int xchk_dirpath_append(struct xchk_dirtree *dl, struct xfs_inode *ip, + struct xchk_dirpath *path, const struct xfs_name *name, + const struct xfs_parent_rec *pptr); +void xchk_dirtree_evaluate(struct xchk_dirtree *dl, + struct xchk_dirtree_outcomes *oc); + +#endif /* __XFS_SCRUB_DIRTREE_H__ */ diff --git a/fs/xfs/scrub/dirtree_repair.c b/fs/xfs/scrub/dirtree_repair.c new file mode 100644 index 000000000000..5c04e70ba951 --- /dev/null +++ b/fs/xfs/scrub/dirtree_repair.c @@ -0,0 +1,821 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2023-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_trans_space.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_attr.h" +#include "xfs_parent.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/bitmap.h" +#include "scrub/ino_bitmap.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" +#include "scrub/listxattr.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/orphanage.h" +#include "scrub/dirtree.h" +#include "scrub/readdir.h" + +/* + * Directory Tree Structure Repairs + * ================================ + * + * If we decide that the directory being scanned is participating in a + * directory loop, the only change we can make is to remove directory entries + * pointing down to @sc->ip. If that leaves it with no parents, the directory + * should be adopted by the orphanage. + */ + +/* Set up to repair directory loops. */ +int +xrep_setup_dirtree( + struct xfs_scrub *sc) +{ + return xrep_orphanage_try_create(sc); +} + +/* Change the outcome of this path. */ +static inline void +xrep_dirpath_set_outcome( + struct xchk_dirtree *dl, + struct xchk_dirpath *path, + enum xchk_dirpath_outcome outcome) +{ + trace_xrep_dirpath_set_outcome(dl->sc, path->path_nr, path->nr_steps, + outcome); + + path->outcome = outcome; +} + +/* Delete all paths. */ +STATIC void +xrep_dirtree_delete_all_paths( + struct xchk_dirtree *dl, + struct xchk_dirtree_outcomes *oc) +{ + struct xchk_dirpath *path; + + xchk_dirtree_for_each_path(dl, path) { + switch (path->outcome) { + case XCHK_DIRPATH_CORRUPT: + case XCHK_DIRPATH_LOOP: + oc->suspect--; + oc->bad++; + xrep_dirpath_set_outcome(dl, path, XCHK_DIRPATH_DELETE); + break; + case XCHK_DIRPATH_OK: + oc->good--; + oc->bad++; + xrep_dirpath_set_outcome(dl, path, XCHK_DIRPATH_DELETE); + break; + default: + break; + } + } + + ASSERT(oc->suspect == 0); + ASSERT(oc->good == 0); +} + +/* Since this is the surviving path, set the dotdot entry to this value. */ +STATIC void +xrep_dirpath_retain_parent( + struct xchk_dirtree *dl, + struct xchk_dirpath *path) +{ + struct xchk_dirpath_step step; + int error; + + error = xfarray_load(dl->path_steps, path->first_step, &step); + if (error) + return; + + dl->parent_ino = be64_to_cpu(step.pptr_rec.p_ino); +} + +/* Find the one surviving path so we know how to set dotdot. */ +STATIC void +xrep_dirtree_find_surviving_path( + struct xchk_dirtree *dl, + struct xchk_dirtree_outcomes *oc) +{ + struct xchk_dirpath *path; + bool foundit = false; + + xchk_dirtree_for_each_path(dl, path) { + switch (path->outcome) { + case XCHK_DIRPATH_CORRUPT: + case XCHK_DIRPATH_LOOP: + case XCHK_DIRPATH_OK: + if (!foundit) { + xrep_dirpath_retain_parent(dl, path); + foundit = true; + continue; + } + ASSERT(foundit == false); + break; + default: + break; + } + } + + ASSERT(oc->suspect + oc->good == 1); +} + +/* Delete all paths except for the one good one. */ +STATIC void +xrep_dirtree_keep_one_good_path( + struct xchk_dirtree *dl, + struct xchk_dirtree_outcomes *oc) +{ + struct xchk_dirpath *path; + bool foundit = false; + + xchk_dirtree_for_each_path(dl, path) { + switch (path->outcome) { + case XCHK_DIRPATH_CORRUPT: + case XCHK_DIRPATH_LOOP: + oc->suspect--; + oc->bad++; + xrep_dirpath_set_outcome(dl, path, XCHK_DIRPATH_DELETE); + break; + case XCHK_DIRPATH_OK: + if (!foundit) { + xrep_dirpath_retain_parent(dl, path); + foundit = true; + continue; + } + oc->good--; + oc->bad++; + xrep_dirpath_set_outcome(dl, path, XCHK_DIRPATH_DELETE); + break; + default: + break; + } + } + + ASSERT(oc->suspect == 0); + ASSERT(oc->good < 2); +} + +/* Delete all paths except for one suspect one. */ +STATIC void +xrep_dirtree_keep_one_suspect_path( + struct xchk_dirtree *dl, + struct xchk_dirtree_outcomes *oc) +{ + struct xchk_dirpath *path; + bool foundit = false; + + xchk_dirtree_for_each_path(dl, path) { + switch (path->outcome) { + case XCHK_DIRPATH_CORRUPT: + case XCHK_DIRPATH_LOOP: + if (!foundit) { + xrep_dirpath_retain_parent(dl, path); + foundit = true; + continue; + } + oc->suspect--; + oc->bad++; + xrep_dirpath_set_outcome(dl, path, XCHK_DIRPATH_DELETE); + break; + case XCHK_DIRPATH_OK: + ASSERT(0); + break; + default: + break; + } + } + + ASSERT(oc->suspect == 1); + ASSERT(oc->good == 0); +} + +/* + * Figure out what to do with the paths we tried to find. Returns -EDEADLOCK + * if the scan results have become stale. + */ +STATIC void +xrep_dirtree_decide_fate( + struct xchk_dirtree *dl, + struct xchk_dirtree_outcomes *oc) +{ + xchk_dirtree_evaluate(dl, oc); + + /* Parentless directories should not have any paths at all. */ + if (xchk_dirtree_parentless(dl)) { + xrep_dirtree_delete_all_paths(dl, oc); + return; + } + + /* One path is exactly the number of paths we want. */ + if (oc->good + oc->suspect == 1) { + xrep_dirtree_find_surviving_path(dl, oc); + return; + } + + /* Zero paths means we should reattach the subdir to the orphanage. */ + if (oc->good + oc->suspect == 0) { + if (dl->sc->orphanage) + oc->needs_adoption = true; + return; + } + + /* + * Otherwise, this subdirectory has too many parents. If there's at + * least one good path, keep it and delete the others. + */ + if (oc->good > 0) { + xrep_dirtree_keep_one_good_path(dl, oc); + return; + } + + /* + * There are no good paths and there are too many suspect paths. + * Keep the first suspect path and delete the rest. + */ + xrep_dirtree_keep_one_suspect_path(dl, oc); +} + +/* + * Load the first step of this path into @step and @dl->xname/pptr + * for later repair work. + */ +STATIC int +xrep_dirtree_prep_path( + struct xchk_dirtree *dl, + struct xchk_dirpath *path, + struct xchk_dirpath_step *step) +{ + int error; + + error = xfarray_load(dl->path_steps, path->first_step, step); + if (error) + return error; + + error = xfblob_loadname(dl->path_names, step->name_cookie, &dl->xname, + step->name_len); + if (error) + return error; + + dl->pptr_rec = step->pptr_rec; /* struct copy */ + return 0; +} + +/* Delete the VFS dentry for a removed child. */ +STATIC int +xrep_dirtree_purge_dentry( + struct xchk_dirtree *dl, + struct xfs_inode *dp, + const struct xfs_name *name) +{ + struct qstr qname = QSTR_INIT(name->name, name->len); + struct dentry *parent_dentry, *child_dentry; + int error = 0; + + /* + * Find the dentry for the parent directory. If there isn't one, we're + * done. Caller already holds i_rwsem for parent and child. + */ + parent_dentry = d_find_alias(VFS_I(dp)); + if (!parent_dentry) + return 0; + + /* The VFS thinks the parent is a directory, right? */ + if (!d_is_dir(parent_dentry)) { + ASSERT(d_is_dir(parent_dentry)); + error = -EFSCORRUPTED; + goto out_dput_parent; + } + + /* + * Try to find the dirent pointing to the child. If there isn't one, + * we're done. + */ + qname.hash = full_name_hash(parent_dentry, name->name, name->len); + child_dentry = d_lookup(parent_dentry, &qname); + if (!child_dentry) { + error = 0; + goto out_dput_parent; + } + + trace_xrep_dirtree_delete_child(dp->i_mount, child_dentry); + + /* Child is not a directory? We're screwed. */ + if (!d_is_dir(child_dentry)) { + ASSERT(d_is_dir(child_dentry)); + error = -EFSCORRUPTED; + goto out_dput_child; + } + + /* Replace the child dentry with a negative one. */ + d_delete(child_dentry); + +out_dput_child: + dput(child_dentry); +out_dput_parent: + dput(parent_dentry); + return error; +} + +/* + * Prepare to delete a link by taking the IOLOCK of the parent and the child + * (scrub target). Caller must hold IOLOCK_EXCL on @sc->ip. Returns 0 if we + * took both locks, or a negative errno if we couldn't lock the parent in time. + */ +static inline int +xrep_dirtree_unlink_iolock( + struct xfs_scrub *sc, + struct xfs_inode *dp) +{ + int error; + + ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL); + + if (xfs_ilock_nowait(dp, XFS_IOLOCK_EXCL)) + return 0; + + xchk_iunlock(sc, XFS_IOLOCK_EXCL); + do { + xfs_ilock(dp, XFS_IOLOCK_EXCL); + if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL)) + break; + xfs_iunlock(dp, XFS_IOLOCK_EXCL); + + if (xchk_should_terminate(sc, &error)) { + xchk_ilock(sc, XFS_IOLOCK_EXCL); + return error; + } + + delay(1); + } while (1); + + return 0; +} + +/* + * Remove a link from the directory tree and update the dcache. Returns + * -ESTALE if the scan data are now out of date. + */ +STATIC int +xrep_dirtree_unlink( + struct xchk_dirtree *dl, + struct xfs_inode *dp, + struct xchk_dirpath *path, + struct xchk_dirpath_step *step) +{ + struct xfs_scrub *sc = dl->sc; + struct xfs_mount *mp = sc->mp; + xfs_ino_t dotdot_ino; + xfs_ino_t parent_ino = dl->parent_ino; + unsigned int resblks; + int dontcare; + int error; + + /* Take IOLOCK_EXCL of the parent and child. */ + error = xrep_dirtree_unlink_iolock(sc, dp); + if (error) + return error; + + /* + * Create the transaction that we need to sever the path. Ignore + * EDQUOT and ENOSPC being returned via nospace_error because the + * directory code can handle a reservationless update. + */ + resblks = xfs_remove_space_res(mp, step->name_len); + error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, sc->ip, + &resblks, &sc->tp, &dontcare); + if (error) + goto out_iolock; + + /* + * Cancel if someone invalidate the paths while we were trying to get + * the ILOCK. + */ + mutex_lock(&dl->lock); + if (dl->stale) { + mutex_unlock(&dl->lock); + error = -ESTALE; + goto out_trans_cancel; + } + xrep_dirpath_set_outcome(dl, path, XREP_DIRPATH_DELETING); + mutex_unlock(&dl->lock); + + trace_xrep_dirtree_delete_path(dl->sc, sc->ip, path->path_nr, + &dl->xname, &dl->pptr_rec); + + /* + * Decide if we need to reset the dotdot entry. Rules: + * + * - If there's a surviving parent, we want dotdot to point there. + * - If we don't have any surviving parents, then point dotdot at the + * root dir. + * - If dotdot is already set to the value we want, pass in NULLFSINO + * for no change necessary. + * + * Do this /before/ we dirty anything, in case the dotdot lookup + * fails. + */ + error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &dotdot_ino); + if (error) + goto out_trans_cancel; + if (parent_ino == NULLFSINO) + parent_ino = dl->root_ino; + if (dotdot_ino == parent_ino) + parent_ino = NULLFSINO; + + /* Drop the link from sc->ip's dotdot entry. */ + error = xfs_droplink(sc->tp, dp); + if (error) + goto out_trans_cancel; + + /* Reset the dotdot entry to a surviving parent. */ + if (parent_ino != NULLFSINO) { + error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot, + parent_ino, 0); + if (error) + goto out_trans_cancel; + } + + /* Drop the link from dp to sc->ip. */ + error = xfs_droplink(sc->tp, sc->ip); + if (error) + goto out_trans_cancel; + + error = xfs_dir_removename(sc->tp, dp, &dl->xname, sc->ip->i_ino, + resblks); + if (error) { + ASSERT(error != -ENOENT); + goto out_trans_cancel; + } + + if (xfs_has_parent(sc->mp)) { + error = xfs_parent_removename(sc->tp, &dl->ppargs, dp, + &dl->xname, sc->ip); + if (error) + goto out_trans_cancel; + } + + /* + * Notify dirent hooks that we removed the bad link, invalidate the + * dcache, and commit the repair. + */ + xfs_dir_update_hook(dp, sc->ip, -1, &dl->xname); + error = xrep_dirtree_purge_dentry(dl, dp, &dl->xname); + if (error) + goto out_trans_cancel; + + error = xrep_trans_commit(sc); + goto out_ilock; + +out_trans_cancel: + xchk_trans_cancel(sc); +out_ilock: + xfs_iunlock(sc->ip, XFS_ILOCK_EXCL); + xfs_iunlock(dp, XFS_ILOCK_EXCL); +out_iolock: + xfs_iunlock(dp, XFS_IOLOCK_EXCL); + return error; +} + +/* + * Delete a directory entry that points to this directory. Returns -ESTALE + * if the scan data are now out of date. + */ +STATIC int +xrep_dirtree_delete_path( + struct xchk_dirtree *dl, + struct xchk_dirpath *path) +{ + struct xchk_dirpath_step step; + struct xfs_scrub *sc = dl->sc; + struct xfs_inode *dp; + int error; + + /* + * Load the parent pointer and directory inode for this path, then + * drop the scan lock, the ILOCK, and the transaction so that + * _delete_path can reserve the proper transaction. This sets up + * @dl->xname for the deletion. + */ + error = xrep_dirtree_prep_path(dl, path, &step); + if (error) + return error; + + error = xchk_iget(sc, be64_to_cpu(step.pptr_rec.p_ino), &dp); + if (error) + return error; + + mutex_unlock(&dl->lock); + xchk_trans_cancel(sc); + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + /* Delete the directory link and release the parent. */ + error = xrep_dirtree_unlink(dl, dp, path, &step); + xchk_irele(sc, dp); + + /* + * Retake all the resources we had at the beginning even if the repair + * failed or the scan data are now stale. This keeps things simple for + * the caller. + */ + xchk_trans_alloc_empty(sc); + xchk_ilock(sc, XFS_ILOCK_EXCL); + mutex_lock(&dl->lock); + + if (!error && dl->stale) + error = -ESTALE; + return error; +} + +/* Add a new path to represent our in-progress adoption. */ +STATIC int +xrep_dirtree_create_adoption_path( + struct xchk_dirtree *dl) +{ + struct xfs_scrub *sc = dl->sc; + struct xchk_dirpath *path; + int error; + + /* + * We should have capped the number of paths at XFS_MAXLINK-1 in the + * scanner. + */ + if (dl->nr_paths > XFS_MAXLINK) { + ASSERT(dl->nr_paths <= XFS_MAXLINK); + return -EFSCORRUPTED; + } + + /* + * Create a new xchk_path structure to remember this parent pointer + * and record the first name step. + */ + path = kmalloc(sizeof(struct xchk_dirpath), XCHK_GFP_FLAGS); + if (!path) + return -ENOMEM; + + INIT_LIST_HEAD(&path->list); + xino_bitmap_init(&path->seen_inodes); + path->nr_steps = 0; + path->outcome = XREP_DIRPATH_ADOPTING; + + /* + * Record the new link that we just created in the orphanage. Because + * adoption is the last repair that we perform, we don't bother filling + * in the path all the way back to the root. + */ + xfs_inode_to_parent_rec(&dl->pptr_rec, sc->orphanage); + + error = xino_bitmap_set(&path->seen_inodes, sc->orphanage->i_ino); + if (error) + goto out_path; + + trace_xrep_dirtree_create_adoption(sc, sc->ip, dl->nr_paths, + &dl->xname, &dl->pptr_rec); + + error = xchk_dirpath_append(dl, sc->ip, path, &dl->xname, + &dl->pptr_rec); + if (error) + goto out_path; + + path->first_step = xfarray_length(dl->path_steps) - 1; + path->second_step = XFARRAY_NULLIDX; + path->path_nr = dl->nr_paths; + + list_add_tail(&path->list, &dl->path_list); + dl->nr_paths++; + return 0; + +out_path: + kfree(path); + return error; +} + +/* + * Prepare to move a file to the orphanage by taking the IOLOCK of the + * orphanage and the child (scrub target). Caller must hold IOLOCK_EXCL on + * @sc->ip. Returns 0 if we took both locks, or a negative errno if we + * couldn't lock the orphanage in time. + */ +static inline int +xrep_dirtree_adopt_iolock( + struct xfs_scrub *sc) +{ + int error; + + ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL); + + if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) + return 0; + + xchk_iunlock(sc, XFS_IOLOCK_EXCL); + do { + xrep_orphanage_ilock(sc, XFS_IOLOCK_EXCL); + if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL)) + break; + xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); + + if (xchk_should_terminate(sc, &error)) { + xchk_ilock(sc, XFS_IOLOCK_EXCL); + return error; + } + + delay(1); + } while (1); + + return 0; +} + +/* + * Reattach this orphaned directory to the orphanage. Do not call this with + * any resources held. Returns -ESTALE if the scan data have become out of + * date. + */ +STATIC int +xrep_dirtree_adopt( + struct xchk_dirtree *dl) +{ + struct xfs_scrub *sc = dl->sc; + int error; + + /* Take the IOLOCK of the orphanage and the scrub target. */ + error = xrep_dirtree_adopt_iolock(sc); + if (error) + return error; + + /* + * Set up for an adoption. The directory tree fixer runs after the + * link counts have been corrected. Therefore, we must bump the + * child's link count since there will be no further opportunity to fix + * errors. + */ + error = xrep_adoption_trans_alloc(sc, &dl->adoption); + if (error) + goto out_iolock; + dl->adoption.bump_child_nlink = true; + + /* Figure out what name we're going to use here. */ + error = xrep_adoption_compute_name(&dl->adoption, &dl->xname); + if (error) + goto out_trans; + + /* + * Now that we have a proposed name for the orphanage entry, create + * a faux path so that the live update hook will see it. + */ + mutex_lock(&dl->lock); + if (dl->stale) { + mutex_unlock(&dl->lock); + error = -ESTALE; + goto out_trans; + } + error = xrep_dirtree_create_adoption_path(dl); + mutex_unlock(&dl->lock); + if (error) + goto out_trans; + + /* Reparent the directory. */ + error = xrep_adoption_move(&dl->adoption); + if (error) + goto out_trans; + + /* + * Commit the name and release all inode locks except for the scrub + * target's IOLOCK. + */ + error = xrep_trans_commit(sc); + goto out_ilock; + +out_trans: + xchk_trans_cancel(sc); +out_ilock: + xchk_iunlock(sc, XFS_ILOCK_EXCL); + xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL); +out_iolock: + xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); + return error; +} + +/* + * This newly orphaned directory needs to be adopted by the orphanage. + * Make this happen. + */ +STATIC int +xrep_dirtree_move_to_orphanage( + struct xchk_dirtree *dl) +{ + struct xfs_scrub *sc = dl->sc; + int error; + + /* + * Start by dropping all the resources that we hold so that we can grab + * all the resources that we need for the adoption. + */ + mutex_unlock(&dl->lock); + xchk_trans_cancel(sc); + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + /* Perform the adoption. */ + error = xrep_dirtree_adopt(dl); + + /* + * Retake all the resources we had at the beginning even if the repair + * failed or the scan data are now stale. This keeps things simple for + * the caller. + */ + xchk_trans_alloc_empty(sc); + xchk_ilock(sc, XFS_ILOCK_EXCL); + mutex_lock(&dl->lock); + + if (!error && dl->stale) + error = -ESTALE; + return error; +} + +/* + * Try to fix all the problems. Returns -ESTALE if the scan data have become + * out of date. + */ +STATIC int +xrep_dirtree_fix_problems( + struct xchk_dirtree *dl, + struct xchk_dirtree_outcomes *oc) +{ + struct xchk_dirpath *path; + int error; + + /* Delete all the paths we don't want. */ + xchk_dirtree_for_each_path(dl, path) { + if (path->outcome != XCHK_DIRPATH_DELETE) + continue; + + error = xrep_dirtree_delete_path(dl, path); + if (error) + return error; + } + + /* Reparent this directory to the orphanage. */ + if (oc->needs_adoption) { + if (xrep_orphanage_can_adopt(dl->sc)) + return xrep_dirtree_move_to_orphanage(dl); + return -EFSCORRUPTED; + } + + return 0; +} + +/* Fix directory loops involving this directory. */ +int +xrep_dirtree( + struct xfs_scrub *sc) +{ + struct xchk_dirtree *dl = sc->buf; + struct xchk_dirtree_outcomes oc; + int error; + + /* + * Prepare to fix the directory tree by retaking the scan lock. The + * order of resource acquisition is still IOLOCK -> transaction -> + * ILOCK -> scan lock. + */ + mutex_lock(&dl->lock); + do { + /* + * Decide what we're going to do, then do it. An -ESTALE + * return here means the scan results are invalid and we have + * to walk again. + */ + if (!dl->stale) { + xrep_dirtree_decide_fate(dl, &oc); + + trace_xrep_dirtree_decided_fate(dl, &oc); + + error = xrep_dirtree_fix_problems(dl, &oc); + if (!error || error != -ESTALE) + break; + } + error = xchk_dirtree_find_paths_to_root(dl); + if (error == -ELNRNG || error == -ENOSR) + error = -EFSCORRUPTED; + } while (!error); + mutex_unlock(&dl->lock); + + return error; +} diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 9020a6bef7f1..b712a8bd34f5 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -108,6 +108,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS, XFS_SICK_FS_COUNTERS }, [XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK }, [XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS }, + [XFS_SCRUB_TYPE_DIRTREE] = { XHG_INO, XFS_SICK_INO_DIRTREE }, }; /* Return the health status mask for this scrub type. */ diff --git a/fs/xfs/scrub/ino_bitmap.h b/fs/xfs/scrub/ino_bitmap.h new file mode 100644 index 000000000000..1300833679ab --- /dev/null +++ b/fs/xfs/scrub/ino_bitmap.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2023-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_INO_BITMAP_H__ +#define __XFS_SCRUB_INO_BITMAP_H__ + +/* Bitmaps, but for type-checked for xfs_ino_t */ + +struct xino_bitmap { + struct xbitmap64 inobitmap; +}; + +static inline void xino_bitmap_init(struct xino_bitmap *bitmap) +{ + xbitmap64_init(&bitmap->inobitmap); +} + +static inline void xino_bitmap_destroy(struct xino_bitmap *bitmap) +{ + xbitmap64_destroy(&bitmap->inobitmap); +} + +static inline int xino_bitmap_set(struct xino_bitmap *bitmap, xfs_ino_t ino) +{ + return xbitmap64_set(&bitmap->inobitmap, ino, 1); +} + +static inline int xino_bitmap_test(struct xino_bitmap *bitmap, xfs_ino_t ino) +{ + uint64_t len = 1; + + return xbitmap64_test(&bitmap->inobitmap, ino, &len); +} + +#endif /* __XFS_SCRUB_INO_BITMAP_H__ */ diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index b2f905924d0d..b1c6c60ee1da 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -570,6 +570,12 @@ xrep_adoption_move( xfs_bumplink(sc->tp, sc->orphanage); xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE); + /* Bump the link count of the child. */ + if (adopt->bump_child_nlink) { + xfs_bumplink(sc->tp, sc->ip); + xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); + } + /* Replace the dotdot entry if the child is a subdirectory. */ if (isdir) { error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot, diff --git a/fs/xfs/scrub/orphanage.h b/fs/xfs/scrub/orphanage.h index beb6b686784e..7c7a2e7d81db 100644 --- a/fs/xfs/scrub/orphanage.h +++ b/fs/xfs/scrub/orphanage.h @@ -60,6 +60,14 @@ struct xrep_adoption { /* Block reservations for orphanage and child (if directory). */ unsigned int orphanage_blkres; unsigned int child_blkres; + + /* + * Does the caller want us to bump the child link count? This is not + * needed when reattaching files that have become disconnected but have + * nlink > 1. It is necessary when changing the directory tree + * structure. + */ + bool bump_child_nlink:1; }; bool xrep_orphanage_can_adopt(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 622eb486a16f..0e0dc2bf985c 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -95,6 +95,7 @@ int xrep_setup_directory(struct xfs_scrub *sc); int xrep_setup_parent(struct xfs_scrub *sc); int xrep_setup_nlinks(struct xfs_scrub *sc); int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *resblks); +int xrep_setup_dirtree(struct xfs_scrub *sc); /* Repair setup functions */ int xrep_setup_ag_allocbt(struct xfs_scrub *sc); @@ -132,6 +133,7 @@ int xrep_xattr(struct xfs_scrub *sc); int xrep_directory(struct xfs_scrub *sc); int xrep_parent(struct xfs_scrub *sc); int xrep_symlink(struct xfs_scrub *sc); +int xrep_dirtree(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xrep_rtbitmap(struct xfs_scrub *sc); @@ -205,6 +207,7 @@ xrep_setup_nothing( #define xrep_setup_directory xrep_setup_nothing #define xrep_setup_parent xrep_setup_nothing #define xrep_setup_nlinks xrep_setup_nothing +#define xrep_setup_dirtree xrep_setup_nothing #define xrep_setup_inode(sc, imap) ((void)0) @@ -239,6 +242,7 @@ static inline int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *x) #define xrep_directory xrep_notsupported #define xrep_parent xrep_notsupported #define xrep_symlink xrep_notsupported +#define xrep_dirtree xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 7b1f1abdc7a9..e813b66b603a 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -436,6 +436,13 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .scrub = xchk_health_record, .repair = xrep_notsupported, }, + [XFS_SCRUB_TYPE_DIRTREE] = { /* directory tree structure */ + .type = ST_INODE, + .setup = xchk_setup_dirtree, + .scrub = xchk_dirtree, + .has = xfs_has_parent, + .repair = xrep_dirtree, + }, }; static int diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 54a4242bc79c..391027047146 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -185,6 +185,7 @@ int xchk_directory(struct xfs_scrub *sc); int xchk_xattr(struct xfs_scrub *sc); int xchk_symlink(struct xfs_scrub *sc); int xchk_parent(struct xfs_scrub *sc); +int xchk_dirtree(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xchk_rtbitmap(struct xfs_scrub *sc); int xchk_rtsummary(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c index 42cafbed94ac..7996c2335476 100644 --- a/fs/xfs/scrub/stats.c +++ b/fs/xfs/scrub/stats.c @@ -79,6 +79,7 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_FSCOUNTERS] = "fscounters", [XFS_SCRUB_TYPE_QUOTACHECK] = "quotacheck", [XFS_SCRUB_TYPE_NLINKS] = "nlinks", + [XFS_SCRUB_TYPE_DIRTREE] = "dirtree", }; /* Format the scrub stats into a text buffer, similar to pcp style. */ diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 4a8cc2c98d99..4470ad0533b8 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -28,6 +28,10 @@ #include "scrub/orphanage.h" #include "scrub/nlinks.h" #include "scrub/fscounters.h" +#include "scrub/bitmap.h" +#include "scrub/ino_bitmap.h" +#include "scrub/xfblob.h" +#include "scrub/dirtree.h" /* Figure out which block the btree cursor was pointing to. */ static inline xfs_fsblock_t diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index ecfaa4b88910..b3756722bee1 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -27,6 +27,9 @@ struct xchk_nlink; struct xchk_fscounters; struct xfs_rmap_update_params; struct xfs_parent_rec; +enum xchk_dirpath_outcome; +struct xchk_dirtree; +struct xchk_dirtree_outcomes; /* * ftrace's __print_symbolic requires that all enum values be wrapped in the @@ -65,6 +68,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_QUOTACHECK); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY); +TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE); #define XFS_SCRUB_TYPE_STRINGS \ { XFS_SCRUB_TYPE_PROBE, "probe" }, \ @@ -94,7 +98,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY); { XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" }, \ { XFS_SCRUB_TYPE_QUOTACHECK, "quotacheck" }, \ { XFS_SCRUB_TYPE_NLINKS, "nlinks" }, \ - { XFS_SCRUB_TYPE_HEALTHY, "healthy" } + { XFS_SCRUB_TYPE_HEALTHY, "healthy" }, \ + { XFS_SCRUB_TYPE_DIRTREE, "dirtree" } #define XFS_SCRUB_FLAG_STRINGS \ { XFS_SCRUB_IFLAG_REPAIR, "repair" }, \ @@ -171,6 +176,8 @@ DEFINE_EVENT(xchk_class, name, \ DEFINE_SCRUB_EVENT(xchk_start); DEFINE_SCRUB_EVENT(xchk_done); DEFINE_SCRUB_EVENT(xchk_deadlock_retry); +DEFINE_SCRUB_EVENT(xchk_dirtree_start); +DEFINE_SCRUB_EVENT(xchk_dirtree_done); DEFINE_SCRUB_EVENT(xrep_attempt); DEFINE_SCRUB_EVENT(xrep_done); @@ -1576,6 +1583,263 @@ DEFINE_XCHK_PPTR_EVENT(xchk_parent_defer); DEFINE_XCHK_PPTR_EVENT(xchk_parent_slowpath); DEFINE_XCHK_PPTR_EVENT(xchk_parent_ultraslowpath); +DECLARE_EVENT_CLASS(xchk_dirtree_class, + TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *ip, + unsigned int path_nr, const struct xfs_name *name, + const struct xfs_parent_rec *pptr), + TP_ARGS(sc, ip, path_nr, name, pptr), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, path_nr) + __field(xfs_ino_t, child_ino) + __field(unsigned int, child_gen) + __field(xfs_ino_t, parent_ino) + __field(unsigned int, parent_gen) + __field(unsigned int, namelen) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->path_nr = path_nr; + __entry->child_ino = ip->i_ino; + __entry->child_gen = VFS_I(ip)->i_generation; + __entry->parent_ino = be64_to_cpu(pptr->p_ino); + __entry->parent_gen = be32_to_cpu(pptr->p_gen); + __entry->namelen = name->len; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d path %u child_ino 0x%llx child_gen 0x%x parent_ino 0x%llx parent_gen 0x%x name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->path_nr, + __entry->child_ino, + __entry->child_gen, + __entry->parent_ino, + __entry->parent_gen, + __entry->namelen, + __get_str(name)) +); +#define DEFINE_XCHK_DIRTREE_EVENT(name) \ +DEFINE_EVENT(xchk_dirtree_class, name, \ + TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *ip, \ + unsigned int path_nr, const struct xfs_name *name, \ + const struct xfs_parent_rec *pptr), \ + TP_ARGS(sc, ip, path_nr, name, pptr)) +DEFINE_XCHK_DIRTREE_EVENT(xchk_dirtree_create_path); +DEFINE_XCHK_DIRTREE_EVENT(xchk_dirpath_walk_upwards); + +DECLARE_EVENT_CLASS(xchk_dirpath_class, + TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *ip, + unsigned int path_nr, unsigned int step_nr, + const struct xfs_name *name, + const struct xfs_parent_rec *pptr), + TP_ARGS(sc, ip, path_nr, step_nr, name, pptr), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, path_nr) + __field(unsigned int, step_nr) + __field(xfs_ino_t, child_ino) + __field(unsigned int, child_gen) + __field(xfs_ino_t, parent_ino) + __field(unsigned int, parent_gen) + __field(unsigned int, namelen) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->path_nr = path_nr; + __entry->step_nr = step_nr; + __entry->child_ino = ip->i_ino; + __entry->child_gen = VFS_I(ip)->i_generation; + __entry->parent_ino = be64_to_cpu(pptr->p_ino); + __entry->parent_gen = be32_to_cpu(pptr->p_gen); + __entry->namelen = name->len; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d path %u step %u child_ino 0x%llx child_gen 0x%x parent_ino 0x%llx parent_gen 0x%x name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->path_nr, + __entry->step_nr, + __entry->child_ino, + __entry->child_gen, + __entry->parent_ino, + __entry->parent_gen, + __entry->namelen, + __get_str(name)) +); +#define DEFINE_XCHK_DIRPATH_EVENT(name) \ +DEFINE_EVENT(xchk_dirpath_class, name, \ + TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *ip, \ + unsigned int path_nr, unsigned int step_nr, \ + const struct xfs_name *name, \ + const struct xfs_parent_rec *pptr), \ + TP_ARGS(sc, ip, path_nr, step_nr, name, pptr)) +DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_disappeared); +DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_badgen); +DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_nondir_parent); +DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_unlinked_parent); +DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_found_next_step); + +TRACE_DEFINE_ENUM(XCHK_DIRPATH_SCANNING); +TRACE_DEFINE_ENUM(XCHK_DIRPATH_DELETE); +TRACE_DEFINE_ENUM(XCHK_DIRPATH_CORRUPT); +TRACE_DEFINE_ENUM(XCHK_DIRPATH_LOOP); +TRACE_DEFINE_ENUM(XCHK_DIRPATH_STALE); +TRACE_DEFINE_ENUM(XCHK_DIRPATH_OK); +TRACE_DEFINE_ENUM(XREP_DIRPATH_DELETING); +TRACE_DEFINE_ENUM(XREP_DIRPATH_DELETED); +TRACE_DEFINE_ENUM(XREP_DIRPATH_ADOPTING); +TRACE_DEFINE_ENUM(XREP_DIRPATH_ADOPTED); + +#define XCHK_DIRPATH_OUTCOME_STRINGS \ + { XCHK_DIRPATH_SCANNING, "scanning" }, \ + { XCHK_DIRPATH_DELETE, "delete" }, \ + { XCHK_DIRPATH_CORRUPT, "corrupt" }, \ + { XCHK_DIRPATH_LOOP, "loop" }, \ + { XCHK_DIRPATH_STALE, "stale" }, \ + { XCHK_DIRPATH_OK, "ok" }, \ + { XREP_DIRPATH_DELETING, "deleting" }, \ + { XREP_DIRPATH_DELETED, "deleted" }, \ + { XREP_DIRPATH_ADOPTING, "adopting" }, \ + { XREP_DIRPATH_ADOPTED, "adopted" } + +DECLARE_EVENT_CLASS(xchk_dirpath_outcome_class, + TP_PROTO(struct xfs_scrub *sc, unsigned long long path_nr, + unsigned int nr_steps, \ + unsigned int outcome), + TP_ARGS(sc, path_nr, nr_steps, outcome), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long long, path_nr) + __field(unsigned int, nr_steps) + __field(unsigned int, outcome) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->path_nr = path_nr; + __entry->nr_steps = nr_steps; + __entry->outcome = outcome; + ), + TP_printk("dev %d:%d path %llu steps %u outcome %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->path_nr, + __entry->nr_steps, + __print_symbolic(__entry->outcome, XCHK_DIRPATH_OUTCOME_STRINGS)) +); +#define DEFINE_XCHK_DIRPATH_OUTCOME_EVENT(name) \ +DEFINE_EVENT(xchk_dirpath_outcome_class, name, \ + TP_PROTO(struct xfs_scrub *sc, unsigned long long path_nr, \ + unsigned int nr_steps, \ + unsigned int outcome), \ + TP_ARGS(sc, path_nr, nr_steps, outcome)) +DEFINE_XCHK_DIRPATH_OUTCOME_EVENT(xchk_dirpath_set_outcome); +DEFINE_XCHK_DIRPATH_OUTCOME_EVENT(xchk_dirpath_evaluate_path); + +DECLARE_EVENT_CLASS(xchk_dirtree_evaluate_class, + TP_PROTO(const struct xchk_dirtree *dl, + const struct xchk_dirtree_outcomes *oc), + TP_ARGS(dl, oc), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_ino_t, rootino) + __field(unsigned int, nr_paths) + __field(unsigned int, bad) + __field(unsigned int, suspect) + __field(unsigned int, good) + __field(bool, needs_adoption) + ), + TP_fast_assign( + __entry->dev = dl->sc->mp->m_super->s_dev; + __entry->ino = dl->sc->ip->i_ino; + __entry->rootino = dl->root_ino; + __entry->nr_paths = dl->nr_paths; + __entry->bad = oc->bad; + __entry->suspect = oc->suspect; + __entry->good = oc->good; + __entry->needs_adoption = oc->needs_adoption ? 1 : 0; + ), + TP_printk("dev %d:%d ino 0x%llx rootino 0x%llx nr_paths %u bad %u suspect %u good %u adopt? %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->rootino, + __entry->nr_paths, + __entry->bad, + __entry->suspect, + __entry->good, + __entry->needs_adoption) +); +#define DEFINE_XCHK_DIRTREE_EVALUATE_EVENT(name) \ +DEFINE_EVENT(xchk_dirtree_evaluate_class, name, \ + TP_PROTO(const struct xchk_dirtree *dl, \ + const struct xchk_dirtree_outcomes *oc), \ + TP_ARGS(dl, oc)) +DEFINE_XCHK_DIRTREE_EVALUATE_EVENT(xchk_dirtree_evaluate); + +TRACE_EVENT(xchk_dirpath_changed, + TP_PROTO(struct xfs_scrub *sc, unsigned int path_nr, + unsigned int step_nr, const struct xfs_inode *dp, + const struct xfs_inode *ip, const struct xfs_name *xname), + TP_ARGS(sc, path_nr, step_nr, dp, ip, xname), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, path_nr) + __field(unsigned int, step_nr) + __field(xfs_ino_t, child_ino) + __field(xfs_ino_t, parent_ino) + __field(unsigned int, namelen) + __dynamic_array(char, name, xname->len) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->path_nr = path_nr; + __entry->step_nr = step_nr; + __entry->child_ino = ip->i_ino; + __entry->parent_ino = dp->i_ino; + __entry->namelen = xname->len; + memcpy(__get_str(name), xname->name, xname->len); + ), + TP_printk("dev %d:%d path %u step %u child_ino 0x%llx parent_ino 0x%llx name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->path_nr, + __entry->step_nr, + __entry->child_ino, + __entry->parent_ino, + __entry->namelen, + __get_str(name)) +); + +TRACE_EVENT(xchk_dirtree_live_update, + TP_PROTO(struct xfs_scrub *sc, const struct xfs_inode *dp, + int action, const struct xfs_inode *ip, int delta, + const struct xfs_name *xname), + TP_ARGS(sc, dp, action, ip, delta, xname), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, parent_ino) + __field(int, action) + __field(xfs_ino_t, child_ino) + __field(int, delta) + __field(unsigned int, namelen) + __dynamic_array(char, name, xname->len) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->parent_ino = dp->i_ino; + __entry->action = action; + __entry->child_ino = ip->i_ino; + __entry->delta = delta; + __entry->namelen = xname->len; + memcpy(__get_str(name), xname->name, xname->len); + ), + TP_printk("dev %d:%d parent_ino 0x%llx child_ino 0x%llx nlink_delta %d name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->parent_ino, + __entry->child_ino, + __entry->delta, + __entry->namelen, + __get_str(name)) +); + /* repair tracepoints */ #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) @@ -2928,6 +3192,7 @@ DEFINE_REPAIR_DENTRY_EVENT(xrep_adoption_check_child); DEFINE_REPAIR_DENTRY_EVENT(xrep_adoption_check_alias); DEFINE_REPAIR_DENTRY_EVENT(xrep_adoption_check_dentry); DEFINE_REPAIR_DENTRY_EVENT(xrep_adoption_invalidate_child); +DEFINE_REPAIR_DENTRY_EVENT(xrep_dirtree_delete_child); TRACE_EVENT(xrep_symlink_salvage_target, TP_PROTO(struct xfs_inode *ip, char *target, unsigned int targetlen), @@ -3230,6 +3495,11 @@ TRACE_EVENT(xrep_iunlink_commit_bucket, __entry->agino) ); +DEFINE_XCHK_DIRPATH_OUTCOME_EVENT(xrep_dirpath_set_outcome); +DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_delete_path); +DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_create_adoption); +DEFINE_XCHK_DIRTREE_EVALUATE_EVENT(xrep_dirtree_decided_fate); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */ diff --git a/fs/xfs/scrub/xfarray.h b/fs/xfs/scrub/xfarray.h index 3b10a58e9f14..8f54c8fc888f 100644 --- a/fs/xfs/scrub/xfarray.h +++ b/fs/xfs/scrub/xfarray.h @@ -8,6 +8,7 @@ /* xfile array index type, along with cursor initialization */ typedef uint64_t xfarray_idx_t; +#define XFARRAY_NULLIDX ((__force xfarray_idx_t)-1ULL) #define XFARRAY_CURSOR_INIT ((__force xfarray_idx_t)0) /* Iterate each index of an xfile array. */ diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index b39f959146bc..10f116d093a2 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -470,6 +470,7 @@ static const struct ioctl_sick_map ino_map[] = { { XFS_SICK_INO_BMBTA_ZAPPED, XFS_BS_SICK_BMBTA }, { XFS_SICK_INO_DIR_ZAPPED, XFS_BS_SICK_DIR }, { XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK }, + { XFS_SICK_INO_DIRTREE, XFS_BS_SICK_DIRTREE }, { 0, 0 }, }; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ebe2ce9bd9ee..58fb7a5062e1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -893,7 +893,7 @@ xfs_init_new_inode( * link count to go to zero, move the inode to AGI unlinked list so that it can * be freed when the last active reference goes away via xfs_inactive(). */ -static int /* error */ +int xfs_droplink( struct xfs_trans *tp, struct xfs_inode *ip) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 04a91e312993..9fd4d29a5713 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -626,6 +626,7 @@ void xfs_end_io(struct work_struct *work); int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2); +int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip); void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip); void xfs_lock_inodes(struct xfs_inode **ips, int inodes, uint lock_mode); void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes); |