// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2017 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_log_format.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "scrub/scrub.h" #include "scrub/common.h" /* Set us up to scrub parents. */ int xchk_setup_parent( struct xfs_scrub *sc) { return xchk_setup_inode_contents(sc, 0); } /* Parent pointers */ /* Look for an entry in a parent pointing to this inode. */ struct xchk_parent_ctx { struct dir_context dc; struct xfs_scrub *sc; xfs_ino_t ino; xfs_nlink_t nlink; bool cancelled; }; /* Look for a single entry in a directory pointing to an inode. */ STATIC bool xchk_parent_actor( struct dir_context *dc, const char *name, int namelen, loff_t pos, u64 ino, unsigned type) { struct xchk_parent_ctx *spc; int error = 0; spc = container_of(dc, struct xchk_parent_ctx, dc); if (spc->ino == ino) spc->nlink++; /* * If we're facing a fatal signal, bail out. Store the cancellation * status separately because the VFS readdir code squashes error codes * into short directory reads. */ if (xchk_should_terminate(spc->sc, &error)) spc->cancelled = true; return !error; } /* Count the number of dentries in the parent dir that point to this inode. */ STATIC int xchk_parent_count_parent_dentries( struct xfs_scrub *sc, struct xfs_inode *parent, xfs_nlink_t *nlink) { struct xchk_parent_ctx spc = { .dc.actor = xchk_parent_actor, .ino = sc->ip->i_ino, .sc = sc, }; size_t bufsize; loff_t oldpos; uint lock_mode; int error = 0; /* * If there are any blocks, read-ahead block 0 as we're almost * certain to have the next operation be a read there. This is * how we guarantee that the parent's extent map has been loaded, * if there is one. */ lock_mode = xfs_ilock_data_map_shared(parent); if (parent->i_df.if_nextents > 0) error = xfs_dir3_data_readahead(parent, 0, 0); xfs_iunlock(parent, lock_mode); if (error) return error; /* * Iterate the parent dir to confirm that there is * exactly one entry pointing back to the inode being * scanned. */ bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, parent->i_disk_size); oldpos = 0; while (true) { error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); if (error) goto out; if (spc.cancelled) { error = -EAGAIN; goto out; } if (oldpos == spc.dc.pos) break; oldpos = spc.dc.pos; } *nlink = spc.nlink; out: return error; } /* * Try to iolock the parent dir @dp in shared mode and the child dir @sc->ip * exclusively. */ STATIC int xchk_parent_lock_two_dirs( struct xfs_scrub *sc, struct xfs_inode *dp) { int error = 0; /* Callers shouldn't do this, but protect ourselves anyway. */ if (dp == sc->ip) { ASSERT(dp != sc->ip); return -EINVAL; } xfs_iunlock(sc->ip, sc->ilock_flags); sc->ilock_flags = 0; while (true) { if (xchk_should_terminate(sc, &error)) return error; /* * Normal XFS takes the IOLOCK before grabbing a transaction. * Scrub holds a transaction, which means that we can't block * on either IOLOCK. */ if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { if (xfs_ilock_nowait(sc->ip, XFS_IOLOCK_EXCL)) { sc->ilock_flags = XFS_IOLOCK_EXCL; break; } xfs_iunlock(dp, XFS_IOLOCK_SHARED); } delay(1); } return 0; } /* * Given the inode number of the alleged parent of the inode being * scrubbed, try to validate that the parent has exactly one directory * entry pointing back to the inode being scrubbed. */ STATIC int xchk_parent_validate( struct xfs_scrub *sc, xfs_ino_t parent_ino) { struct xfs_inode *dp = NULL; xfs_nlink_t expected_nlink; xfs_nlink_t nlink; int error = 0; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return 0; /* '..' must not point to ourselves. */ if (sc->ip->i_ino == parent_ino) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; } /* * If we're an unlinked directory, the parent /won't/ have a link * to us. Otherwise, it should have one link. */ expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; /* * Grab the parent directory inode. This must be released before we * cancel the scrub transaction. * * If _iget returns -EINVAL or -ENOENT then the parent inode number is * garbage and the directory is corrupt. If the _iget returns * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a * cross referencing error. Any other error is an operational error. */ error = xchk_iget(sc, parent_ino, &dp); if (error == -EINVAL || error == -ENOENT) { error = -EFSCORRUPTED; xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); return error; } if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) return error; if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out_rele; } /* * We prefer to keep the inode locked while we lock and search its * alleged parent for a forward reference. If we can grab the iolock * of the alleged parent, then we can move ahead to counting dirents * and checking nlinks. * * However, if we fail to iolock the alleged parent while holding the * child iolock, we have no way to tell if a blocking lock() would * result in an ABBA deadlock. Release the lock on the child, then * try to lock the alleged parent and trylock the child. */ if (!xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { error = xchk_parent_lock_two_dirs(sc, dp); if (error) goto out_rele; /* * Now that we've locked out updates to the child directory, * re-sample the expected nlink and the '..' dirent. */ expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &parent_ino, NULL); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out_unlock; /* * After relocking the child directory, the '..' entry points * to a different parent than before. This means someone moved * the child elsewhere in the directory tree, which means that * the parent link is now correct and we're done. */ if (parent_ino != dp->i_ino) goto out_unlock; } /* Look for a directory entry in the parent pointing to the child. */ error = xchk_parent_count_parent_dentries(sc, dp, &nlink); if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out_unlock; /* * Ensure that the parent has as many links to the child as the child * thinks it has to the parent. */ if (nlink != expected_nlink) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); out_unlock: xfs_iunlock(dp, XFS_IOLOCK_SHARED); out_rele: xchk_irele(sc, dp); return error; } /* Scrub a parent pointer. */ int xchk_parent( struct xfs_scrub *sc) { struct xfs_mount *mp = sc->mp; xfs_ino_t parent_ino; int error; /* * If we're a directory, check that the '..' link points up to * a directory that has one entry pointing to us. */ if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) return -ENOENT; /* We're not a special inode, are we? */ if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; } /* * The VFS grabs a read or write lock via i_rwsem before it reads * or writes to a directory. If we've gotten this far we've * already obtained IOLOCK_EXCL, which (since 4.10) is the same as * getting a write lock on i_rwsem. Therefore, it is safe for us * to drop the ILOCK here in order to do directory lookups. */ sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); /* Look up '..' */ error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &parent_ino, NULL); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) return error; if (!xfs_verify_dir_ino(mp, parent_ino)) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; } /* Is this the root dir? Then '..' must point to itself. */ if (sc->ip == mp->m_rootip) { if (sc->ip->i_ino != mp->m_sb.sb_rootino || sc->ip->i_ino != parent_ino) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; } return xchk_parent_validate(sc, parent_ino); }