// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2021 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_bmap.h" #include "xfs_quota.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" #include "xfs_bmap_util.h" #include "xfs_swapext.h" #include "xfs_xchgrange.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/tempfile.h" #include "scrub/xfarray.h" #include "scrub/xfblob.h" #include "scrub/parent.h" #include "scrub/orphanage.h" /* * Directory Repair * ================ * * We repair directories by reading the directory leaf blocks looking for * entries, truncate the entire directory fork, and reinsert all the entries. * Unfortunately, there's not yet a secondary copy of directory attribute data, * which means that if we blow up midway through there's little we can do. */ /* Directory entry to be restored in the new directory. */ struct xrep_directory_key { /* Cookie for retrieval of the dirent name. */ xfblob_cookie name_cookie; /* Target inode number. */ xfs_ino_t ino; /* Hash of the dirent name. */ unsigned int hash; /* Length of the dirent name. */ uint8_t namelen; /* File type of the dirent. */ uint8_t ftype; }; struct xrep_dir { struct xfs_scrub *sc; /* Fixed-size array of xrep_directory_key structures. */ struct xfarray *dir_entries; /* Blobs containing directory entry names. */ struct xfblob *dir_names; /* * This is the parent that we're going to set on the reconstructed * directory. */ xfs_ino_t parent_ino; /* nlink value of the corrected directory. */ xfs_nlink_t new_nlink; }; /* Absorb up to 8 pages of dirents before we flush them to the temp dir. */ #define XREP_DIR_SALVAGE_BYTES (PAGE_SIZE * 8) static inline struct xfs_da_args * xrep_directory_da_args( struct xfs_scrub *sc) { return sc->buf; } static inline unsigned char * xrep_directory_namebuf( struct xfs_scrub *sc) { return sc->buf; } static inline struct xrep_orphanage_req * xrep_dir_orphanage_req( struct xfs_scrub *sc) { return sc->buf + MAXNAMELEN + 1; } /* Set up for a directory repair. */ int xrep_setup_directory( struct xfs_scrub *sc) { unsigned int sz; int error; error = xrep_orphanage_try_create(sc); if (error) return error; error = xrep_tempfile_create(sc, S_IFDIR); if (error) return error; /* * We need a buffer to hold a directory entry name while we're building * the new directory, later for the da state when we're freeing the old * directory blocks, and a request to move the directory to the * orphanage. We don't need all three uses at the same time. */ sz = max_t(unsigned int, xrep_orphanage_req_sizeof(), sizeof(struct xfs_da_args)); sc->buf = kvmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL); if (!sc->buf) return -ENOMEM; return 0; } /* * Decide if we want to salvage this entry. We don't bother with oversized * names or the dot entry. */ STATIC int xrep_directory_want_salvage( struct xrep_dir *rd, const char *name, int namelen, xfs_ino_t ino) { struct xfs_mount *mp = rd->sc->mp; /* No pointers to ourselves or to garbage. */ if (ino == rd->sc->ip->i_ino) return false; if (!xfs_verify_dir_ino(mp, ino)) return false; /* No weird looking names or dot entries. */ if (namelen > MAXNAMELEN || namelen <= 0) return false; if (namelen == 1 && name[0] == '.') return false; return true; } /* Allocate an in-core record to hold entries while we rebuild the dir data. */ STATIC int xrep_directory_salvage_entry( struct xrep_dir *rd, unsigned char *name, unsigned int namelen, xfs_ino_t ino) { struct xrep_directory_key key = { .ino = ino, }; struct xfs_inode *ip; unsigned int i; int error = 0; if (xchk_should_terminate(rd->sc, &error)) return error; /* Truncate the name to the first illegal character. */ for (i = 0; i < namelen && name[i] != 0 && name[i] != '/'; i++); key.namelen = i; key.hash = xfs_da_hashname(name, key.namelen); trace_xrep_directory_salvage_entry(rd->sc->ip, name, key.namelen, ino); /* Ignore '..' entries; we already picked the new parent. */ if (key.namelen == 2 && name[0] == '.' && name[1] == '.') { trace_xrep_directory_salvaged_parent(rd->sc->ip, ino); return 0; } /* * Compute the ftype or dump the entry if we can't. We don't lock the * inode because inodes can't change type while we have a reference. */ error = xfs_iget(rd->sc->mp, rd->sc->tp, ino, XFS_IGET_UNTRUSTED, 0, &ip); if (error) return 0; /* Don't mix metadata and regular directory trees. */ if (xfs_is_metadata_inode(ip) ^ xfs_is_metadata_inode(rd->sc->ip)) { xchk_irele(rd->sc, ip); return 0; } key.ftype = xfs_mode_to_ftype(VFS_I(ip)->i_mode); xchk_irele(rd->sc, ip); /* Remember this for later. */ error = xfblob_store(rd->dir_names, &key.name_cookie, name, key.namelen); if (error) return error; return xfarray_append(rd->dir_entries, &key); } /* Record a shortform directory entry for later reinsertion. */ STATIC int xrep_directory_salvage_sf_entry( struct xrep_dir *rd, struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep) { xfs_ino_t ino; ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep); if (!xrep_directory_want_salvage(rd, sfep->name, sfep->namelen, ino)) return 0; return xrep_directory_salvage_entry(rd, sfep->name, sfep->namelen, ino); } /* Record a regular directory entry for later reinsertion. */ STATIC int xrep_directory_salvage_data_entry( struct xrep_dir *rd, struct xfs_dir2_data_entry *dep) { xfs_ino_t ino; ino = be64_to_cpu(dep->inumber); if (!xrep_directory_want_salvage(rd, dep->name, dep->namelen, ino)) return 0; return xrep_directory_salvage_entry(rd, dep->name, dep->namelen, ino); } /* Try to recover block/data format directory entries. */ STATIC int xrep_directory_recover_data( struct xrep_dir *rd, struct xfs_buf *bp) { struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo; unsigned int offset; unsigned int end; int error; /* error return value */ /* * Loop over the data portion of the block. * Each object is a real entry (dep) or an unused one (dup). */ offset = geo->data_entry_offset; end = min_t(unsigned int, BBTOB(bp->b_length), xfs_dir3_data_end_offset(geo, bp->b_addr)); while (offset < end) { struct xfs_dir2_data_unused *dup = bp->b_addr + offset; struct xfs_dir2_data_entry *dep = bp->b_addr + offset; if (xchk_should_terminate(rd->sc, &error)) break; /* Skip unused entries. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { offset += be16_to_cpu(dup->length); continue; } /* Don't walk off the end of the block. */ offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen); if (offset > end) break; /* Ok, let's save this entry. */ error = xrep_directory_salvage_data_entry(rd, dep); if (error) return error; } return 0; } /* Try to recover shortform directory entries. */ STATIC int xrep_directory_recover_sf( struct xrep_dir *rd) { struct xfs_dir2_sf_hdr *sfp; struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_entry *next; struct xfs_ifork *ifp; xfs_ino_t ino; unsigned char *end; int error; ifp = XFS_IFORK_PTR(rd->sc->ip, XFS_DATA_FORK); sfp = (struct xfs_dir2_sf_hdr *)rd->sc->ip->i_df.if_u1.if_data; end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes; ino = xfs_dir2_sf_get_parent_ino(sfp); trace_xrep_directory_salvaged_parent(rd->sc->ip, ino); sfep = xfs_dir2_sf_firstentry(sfp); while ((unsigned char *)sfep < end) { if (xchk_should_terminate(rd->sc, &error)) break; next = xfs_dir2_sf_nextentry(rd->sc->mp, sfp, sfep); if ((unsigned char *)next > end) break; /* Ok, let's save this entry. */ error = xrep_directory_salvage_sf_entry(rd, sfp, sfep); if (error) return error; sfep = next; } return 0; } /* * Try to figure out the format of this directory from the data fork mappings * and the directory size. If we can be reasonably sure of format, we can be * more aggressive in salvaging directory entries. On return, @magic_guess * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format" * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory, * and 0 if we can't tell. */ STATIC void xrep_directory_guess_format( struct xrep_dir *rd, __be32 *magic_guess) { struct xfs_inode *ip = rd->sc->ip; struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo; xfs_fileoff_t last; int error; ASSERT(xfs_has_crc(ip->i_mount)); *magic_guess = 0; /* * If there's a single directory block and the directory size is * exactly one block, this has to be a single block format directory. */ error = xfs_bmap_last_offset(ip, &last, XFS_DATA_FORK); if (!error && XFS_FSB_TO_B(ip->i_mount, last) == geo->blksize && ip->i_disk_size == geo->blksize) { *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); return; } /* * If the last extent before the leaf offset matches the directory * size and the directory size is larger than 1 block, this is a * data format directory. */ last = geo->leafblk; error = xfs_bmap_last_before(rd->sc->tp, ip, &last, XFS_DATA_FORK); if (!error && XFS_FSB_TO_B(ip->i_mount, last) > geo->blksize && XFS_FSB_TO_B(ip->i_mount, last) == ip->i_disk_size) { *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC); return; } } /* Recover directory entries from a specific directory block. */ STATIC int xrep_directory_recover_dirblock( struct xrep_dir *rd, __be32 magic_guess, xfs_dablk_t dabno) { struct xfs_dir2_data_hdr *hdr; struct xfs_buf *bp; __be32 oldmagic; int error; /* * Try to read buffer. We invalidate them in the next step so we don't * bother to set a buffer type or ops. */ error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno, XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL); if (error || !bp) return error; hdr = bp->b_addr; oldmagic = hdr->magic; trace_xrep_directory_recover_dirblock(rd->sc->ip, dabno, be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess)); /* * If we're sure of the block's format, proceed with the salvage * operation using the specified magic number. */ if (magic_guess) { hdr->magic = magic_guess; goto recover; } /* * If we couldn't guess what type of directory this is, then we will * only salvage entries from directory blocks that match the magic * number and pass verifiers. */ switch (hdr->magic) { case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops)) goto out; break; case cpu_to_be32(XFS_DIR2_DATA_MAGIC): case cpu_to_be32(XFS_DIR3_DATA_MAGIC): if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops)) goto out; break; default: goto out; } recover: error = xrep_directory_recover_data(rd, bp); out: hdr->magic = oldmagic; xfs_trans_brelse(rd->sc->tp, bp); return error; } /* Insert one dir entry without cycling locks or transactions. */ STATIC int xrep_directory_insert_rec( struct xrep_dir *rd, const struct xrep_directory_key *key) { struct xfs_name name = { .len = key->namelen, .type = key->ftype, }; char *namebuf; struct xfs_mount *mp = rd->sc->mp; uint resblks; int error; name.name = namebuf = xrep_directory_namebuf(rd->sc); /* * We want to use a separate transaction for each dirent that we're * adding to the temporary directory. However, directory salvaging * uses the scrub transaction to avoid livelocking on directory * tree loops, so we have to commit the existing scrub transaction to * get it out of the way. */ error = xrep_trans_commit(rd->sc); if (error) return error; /* The entry name is stored in the in-core buffer. */ error = xfblob_load(rd->dir_names, key->name_cookie, namebuf, key->namelen); if (error) return error; trace_xrep_directory_insert_rec(rd->sc->tempip, &name, key->ino); error = xfs_qm_dqattach(rd->sc->tempip); if (error) return error; resblks = XFS_LINK_SPACE_RES(mp, key->namelen); error = xchk_trans_alloc(rd->sc, resblks); if (error) return error; /* * Join both inodes to the transaction. We previously took the ILOCK * of both inodes, and we intend to hang on to them no matter what * happens here. */ xfs_trans_ijoin(rd->sc->tp, rd->sc->ip, 0); xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0); error = xfs_dir_createname(rd->sc->tp, rd->sc->tempip, &name, key->ino, resblks); if (error) return error; if (name.type == XFS_DIR3_FT_DIR) rd->new_nlink++; /* Roll both inodes, which takes care of logging both inode cores. */ return xrep_roll_trans(rd->sc); } /* * Periodically flush salvaged directory entries to the temporary file. This * is done to reduce the memory requirements of the directory rebuild, since * directories can contain up to 32GB of directory data. */ STATIC int xrep_directory_flush_salvaged( struct xrep_dir *rd) { struct xrep_directory_key key; uint64_t nr = 0; int error; /* Add all the salvaged entries to the temporary directory. */ while (!(error = xfarray_load_next(rd->dir_entries, &nr, &key))) { error = xrep_directory_insert_rec(rd, &key); if (error) return error; } if (error && error != -ENODATA) return error; /* Empty out both arrays now that we've added the entries. */ xfarray_truncate(rd->dir_entries); xfblob_truncate(rd->dir_names); return 0; } /* Extract as many directory entries as we can. */ STATIC int xrep_directory_recover( struct xrep_dir *rd) { struct xfs_bmbt_irec got; struct xfs_scrub *sc = rd->sc; struct xfs_da_geometry *geo = sc->mp->m_dir_geo; xfs_fileoff_t offset; xfs_dablk_t dabno; __be32 magic_guess; int nmap; int error; xrep_directory_guess_format(rd, &magic_guess); /* Iterate each directory data block in the data fork. */ for (offset = 0; offset < geo->leafblk; offset = got.br_startoff + got.br_blockcount) { nmap = 1; error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset, &got, &nmap, 0); if (error) return error; if (nmap != 1) return -EFSCORRUPTED; if (!xfs_bmap_is_written_extent(&got)) continue; for (dabno = round_up(got.br_startoff, geo->fsbcount); dabno < got.br_startoff + got.br_blockcount; dabno += geo->fsbcount) { if (xchk_should_terminate(rd->sc, &error)) return error; error = xrep_directory_recover_dirblock(rd, magic_guess, dabno); if (error) return error; /* Flush dirents to constrain memory usage. */ if (xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names) < XREP_DIR_SALVAGE_BYTES) continue; error = xrep_directory_flush_salvaged(rd); if (error) return error; } } return 0; } /* * Find all the directory entries for this inode by scraping them out of the * directory leaf blocks by hand, and flushing them into the temp dir. */ STATIC int xrep_directory_find_entries( struct xrep_dir *rd) { struct xfs_inode *ip = rd->sc->ip; int error; error = xrep_ino_dqattach(rd->sc); if (error) return error; /* Salvage directory entries from the old directory. */ if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { error = xrep_directory_recover_sf(rd); } else { error = xfs_iread_extents(rd->sc->tp, ip, XFS_DATA_FORK); if (error) return error; error = xrep_directory_recover(rd); } if (error) return error; return xrep_directory_flush_salvaged(rd); } /* Invalidate a directory's blocks and unmap them. */ STATIC int xrep_directory_reset_nonlocal( struct xfs_scrub *sc, struct xfs_inode *dp) { struct xfs_iext_cursor icur; struct xfs_bmbt_irec got; struct xfs_ifork *ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); struct xfs_buf *bp; struct xfs_mount *mp = sc->mp; struct xfs_da_geometry *geo = mp->m_dir_geo; xfs_fileoff_t off; /* * Invalidate each directory block. All directory blocks are of * fsbcount length and alignment, so we only need to walk those same * offsets. * * We use TRYLOCK here (recall that we hold the ILOCK of the directory * inode) so that we skip any buffer that's locked on the assumption * that we don't own that block. */ for_each_xfs_iext(ifp, &icur, &got) { for (off = round_up(got.br_startoff, geo->fsbcount); off < got.br_startoff + got.br_blockcount; off += geo->fsbcount) { xfs_fsblock_t fsbno; fsbno = (off - got.br_startoff) + got.br_startblock; bp = xfs_buf_incore(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsbno), XFS_FSB_TO_BB(mp, geo->fsbcount), XBF_TRYLOCK | _XBF_IGNORE_STALE); if (bp) { xfs_buf_stale(bp); xfs_buf_relse(bp); } } } /* Free all the old dir blocks; don't discard them for speed. */ return xfs_bunmapi_range(&sc->tp, dp, XFS_BMAPI_NODISCARD, 0, XFS_MAX_FILEOFF); } /* * Free all the directory blocks and reset the data fork. The caller must * join the inode to the transaction. This function returns with the inode * joined to a clean scrub transaction. */ STATIC int xrep_directory_reset_fork( struct xfs_scrub *sc, xfs_ino_t parent_ino) { struct xfs_ifork *ifp = XFS_IFORK_PTR(sc->tempip, XFS_DATA_FORK); struct xfs_da_args *args = xrep_directory_da_args(sc); int error; /* Unmap all the directory buffers. */ if (xfs_ifork_has_extents(ifp)) { error = xrep_directory_reset_nonlocal(sc, sc->tempip); if (error) return error; } trace_xrep_directory_reset_fork(sc->tempip, parent_ino); /* Reset the data fork to an empty data fork. */ xfs_idestroy_fork(ifp); ifp->if_bytes = 0; sc->tempip->i_disk_size = 0; /* Reinitialize the short form directory. */ args->geo = sc->mp->m_dir_geo; args->dp = sc->tempip; args->trans = sc->tp; error = xfs_dir2_sf_create(args, parent_ino); if (error) return error; return xrep_roll_trans(sc); } /* * Prepare both inodes' directory forks for extent swapping. Promote the * tempfile from short format to leaf format, and if the file being repaired * has a short format attr fork, turn it into an empty extent list. */ STATIC int xrep_directory_swap_prep( struct xfs_scrub *sc, bool temp_local, bool ip_local) { int error; /* * If the tempfile's attributes are in shortform format, convert that * to a single leaf extent so that we can use the atomic extent swap. */ if (temp_local) { struct xfs_da_args args = { .dp = sc->tempip, .geo = sc->mp->m_dir_geo, .whichfork = XFS_DATA_FORK, .trans = sc->tp, .total = 1, }; error = xfs_dir2_sf_to_block(&args); if (error) return error; error = xfs_defer_finish(&sc->tp); if (error) return error; } /* * If the file being repaired had a shortform attribute fork, convert * that to an empty extent list in preparation for the atomic extent * swap. */ if (ip_local) { struct xfs_ifork *ifp; ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); xfs_idestroy_fork(ifp); ifp->if_format = XFS_DINODE_FMT_EXTENTS; ifp->if_nextents = 0; ifp->if_bytes = 0; ifp->if_u1.if_root = NULL; ifp->if_height = 0; xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE | XFS_ILOG_DDATA); } return 0; } /* * Set the owner for this directory block to the directory being repaired. * Return the magic number that we found, or the usual negative error. */ STATIC int xrep_directory_reset_owner( struct xfs_scrub *sc, xfs_dablk_t dabno, struct xfs_buf *bp, unsigned int *magic) { struct xfs_da_geometry *geo = sc->mp->m_dir_geo; struct xfs_dir3_data_hdr *data3 = bp->b_addr; struct xfs_da3_blkinfo *info3 = bp->b_addr; struct xfs_dir3_free_hdr *free3 = bp->b_addr; struct xfs_dir2_data_entry *dep; /* Directory data blocks. */ if (dabno < geo->leafblk) { *magic = be32_to_cpu(data3->hdr.magic); if (*magic != XFS_DIR3_BLOCK_MAGIC && *magic != XFS_DIR3_DATA_MAGIC) return -EFSCORRUPTED; /* * If this is a block format directory, it's possible that the * block was created as part of converting the temp directory * from short format to block format in order to use the atomic * extent swap. In that case, the '.' entry will be set to * the temp dir, so find the dot entry and reset it. */ if (*magic == XFS_DIR3_BLOCK_MAGIC) { dep = bp->b_addr + geo->data_entry_offset; if (dep->namelen != 1 || dep->name[0] != '.') return -EFSCORRUPTED; dep->inumber = cpu_to_be64(sc->ip->i_ino); } data3->hdr.owner = cpu_to_be64(sc->ip->i_ino); return 0; } /* Directory leaf and da node blocks. */ if (dabno < geo->freeblk) { *magic = be16_to_cpu(info3->hdr.magic); switch (*magic) { case XFS_DA3_NODE_MAGIC: case XFS_DIR3_LEAF1_MAGIC: case XFS_DIR3_LEAFN_MAGIC: break; default: return -EFSCORRUPTED; } info3->owner = cpu_to_be64(sc->ip->i_ino); return 0; } /* Directory free blocks. */ *magic = be32_to_cpu(free3->hdr.magic); if (*magic != XFS_DIR3_FREE_MAGIC) return -EFSCORRUPTED; free3->hdr.owner = cpu_to_be64(sc->ip->i_ino); return 0; } /* * If the buffer didn't have buffer ops set, we need to set them now that we've * dirtied the directory block. */ STATIC void xrep_directory_set_verifier( unsigned int magic, struct xfs_buf *bp) { switch (magic) { case XFS_DIR3_BLOCK_MAGIC: bp->b_ops = &xfs_dir3_block_buf_ops; break; case XFS_DIR3_DATA_MAGIC: bp->b_ops = &xfs_dir3_data_buf_ops; break; case XFS_DA3_NODE_MAGIC: bp->b_ops = &xfs_da3_node_buf_ops; break; case XFS_DIR3_LEAF1_MAGIC: bp->b_ops = &xfs_dir3_leaf1_buf_ops; break; case XFS_DIR3_LEAFN_MAGIC: bp->b_ops = &xfs_dir3_leafn_buf_ops; break; case XFS_DIR3_FREE_MAGIC: bp->b_ops = &xfs_dir3_free_buf_ops; break; } xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); } /* * Change the owner field of every block in the data fork to match the * directory being repaired. */ STATIC int xrep_directory_swap_owner( struct xfs_scrub *sc) { struct xfs_bmbt_irec map; struct xfs_da_geometry *geo = sc->mp->m_dir_geo; struct xfs_buf *bp; xfs_fileoff_t offset = 0; xfs_fileoff_t end = XFS_MAX_FILEOFF; xfs_dablk_t dabno; int nmap; int error; for (offset = 0; offset < end; offset = map.br_startoff + map.br_blockcount) { nmap = 1; error = xfs_bmapi_read(sc->tempip, offset, end - offset, &map, &nmap, 0); if (error) return error; if (nmap != 1) return -EFSCORRUPTED; if (!xfs_bmap_is_written_extent(&map)) continue; for (dabno = round_up(map.br_startoff, geo->fsbcount); dabno < map.br_startoff + map.br_blockcount; dabno += geo->fsbcount) { unsigned int magic; error = xfs_da_read_buf(sc->tp, sc->tempip, dabno, 0, &bp, XFS_DATA_FORK, NULL); if (error) return error; if (!bp) return -EFSCORRUPTED; error = xrep_directory_reset_owner(sc, dabno, bp, &magic); if (error) { xfs_trans_brelse(sc->tp, bp); return error; } if (bp->b_ops == NULL) xrep_directory_set_verifier(magic, bp); xfs_trans_ordered_buf(sc->tp, bp); xfs_trans_brelse(sc->tp, bp); } } return 0; } /* * If both files' directory structure are in short format, we can copy * the short format data from the tempfile to the repaired file if it'll * fit. */ STATIC void xrep_directory_swap_local( struct xfs_scrub *sc, int newsize) { struct xfs_ifork *ifp1, *ifp2; ifp1 = XFS_IFORK_PTR(sc->tempip, XFS_DATA_FORK); ifp2 = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); xfs_idata_realloc(sc->ip, ifp2->if_bytes - ifp1->if_bytes, XFS_DATA_FORK); memcpy(ifp2->if_u1.if_data, ifp1->if_u1.if_data, newsize); xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE | XFS_ILOG_DDATA); } static struct xfs_name xfs_name_dot = { .name = (unsigned char *)".", .len = 1, .type = XFS_DIR3_FT_DIR, }; /* Swap the temporary directory's data fork with the one being repaired. */ STATIC int xrep_directory_swap( struct xrep_dir *rd) { struct xfs_swapext_req req; struct xfs_swapext_res res; struct xfs_scrub *sc = rd->sc; bool ip_local, temp_local; int error; error = xrep_tempfile_swapext_prep(sc, XFS_DATA_FORK, &req, &res); if (error) return error; error = xchk_trans_alloc(sc, res.resblks); if (error) return error; sc->temp_ilock_flags |= XFS_ILOCK_EXCL; sc->ilock_flags |= XFS_ILOCK_EXCL; xfs_xchg_range_ilock(sc->tp, sc->ip, sc->tempip); /* * Reset the temporary directory's '.' entry to point to the directory * we're repairing. Note: shortform directories lack the dot entry. * * It's possible that this replacement could also expand a sf tempdir * into block format. */ if (sc->tempip->i_df.if_format != XFS_DINODE_FMT_LOCAL) { error = xfs_dir_replace(sc->tp, sc->tempip, &xfs_name_dot, sc->ip->i_ino, res.resblks); if (error) return error; } /* * Reset the temporary directory's '..' entry to point to the parent * that we found. The temporary directory was created with the root * directory as the parent, so we can skip this if repairing a * subdirectory of the root. * * It's also possible that this replacement could also expand a sf * tempdir into block format. */ if (rd->parent_ino != sc->mp->m_rootip->i_ino) { error = xfs_dir_replace(sc->tp, rd->sc->tempip, &xfs_name_dotdot, rd->parent_ino, res.resblks); if (error) return error; } /* * Changing the dot and dotdot entries could have changed the shape of * the directory, so we recompute these. */ ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL; temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL; /* * If the both files have a local format data fork and the rebuilt * directory data would fit in the repaired file's data fork, copy * the contents from the tempfile and declare ourselves done. */ if (ip_local && temp_local) { if (sc->tempip->i_disk_size <= XFS_IFORK_DSIZE(sc->ip)) { xrep_directory_swap_local(sc, sc->tempip->i_disk_size); set_nlink(VFS_I(sc->ip), rd->new_nlink); return 0; } } /* Clean the transaction before we start working on the extent swap. */ error = xrep_roll_trans(rd->sc); if (error) return error; /* Otherwise, make sure both data forks are in block-mapping mode. */ error = xrep_directory_swap_prep(sc, temp_local, ip_local); if (error) return error; /* Rewrite the owner field of all attr blocks in the temporary file. */ error = xrep_directory_swap_owner(sc); if (error) return error; /* * Set nlink of the directory under repair to the number of * subdirectories that will be in the new directory data. Do this in * the same transaction sequence that (atomically) commits the new * data. */ set_nlink(VFS_I(sc->ip), rd->new_nlink); return xfs_swapext(&sc->tp, &req); } /* * Insert all the attributes that we collected. * * Commit the repair transaction and drop the ilock because the attribute * setting code needs to be able to allocate special transactions and take the * ilock on its own. Some day we'll have deferred attribute setting, at which * point we'll be able to use that to replace the attributes atomically and * safely. */ STATIC int xrep_directory_rebuild_tree( struct xrep_dir *rd) { int error; trace_xrep_directory_rebuild_tree(rd->sc->ip, rd->parent_ino); /* * Swap the tempdir's data fork with the file being repaired. This * recreates the transaction and re-takes the ILOCK in the scrub * context. */ error = xrep_directory_swap(rd); if (error) return error; /* * Release the old directory blocks and reset the data fork of the temp * directory to an empty shortform directory because inactivation does * nothing for directories. */ return xrep_directory_reset_fork(rd->sc, rd->sc->mp->m_rootip->i_ino); } /* * Try to find the parent of the directory being repaired. * * If this is a directory tree root or an unlinked directory, we already know * the answer. Otherwise, try to look it up in the directory; if we get a * reasonable value, we'll use that. * * NOTE: This function will someday be augmented by the directory parent repair * code, which will know how to check the parent and scan the filesystem if * we cannot find anything. Inode scans will have to be done before we start * salvaging directory entries, so we do this now. */ STATIC int xrep_directory_find_parent( struct xrep_dir *rd, bool *move_orphanage) { struct xfs_scrub *sc = rd->sc; xfs_ino_t parent_ino; int error; /* * If we're the root directory, we are our own parent. If we're an * unlinked directory, the parent /won't/ have a link to us. Set the * parent directory to the root for both cases. */ if (rd->sc->ip->i_ino == sc->mp->m_sb.sb_rootino || VFS_I(rd->sc->ip)->i_nlink == 0) { rd->parent_ino = sc->mp->m_sb.sb_rootino; return 0; } /* Does the VFS dcache have an answer for us? */ parent_ino = xrep_parent_from_dcache(sc); error = xrep_parent_confirm(sc, &parent_ino); if (!error && parent_ino != NULLFSINO) goto foundit; /* * Try to look up '..'; if it seems plausible, go with it. Check that * the parent directory actually points to this directory. If so, we * are good to go. Any errors just push us to scanning the fs. */ parent_ino = xrep_dotdot_lookup(sc); error = xrep_parent_confirm(sc, &parent_ino); if (!error && parent_ino != NULLFSINO) goto foundit; /* Otherwise, scan the entire filesystem. */ error = xrep_parent_scan(sc, &parent_ino); if (error) return error; if (parent_ino == NULLFSINO) { /* * Temporarily assign the root dir as the parent; we'll move * this to the orphanage after swapping the dir contents. */ *move_orphanage = true; rd->parent_ino = sc->mp->m_sb.sb_rootino; return 0; } foundit: rd->parent_ino = parent_ino; return 0; } /* * Move the current file to the orphanage. Caller must not hold any inode * locks. Upon return, the scrub state will reflect the transaction, ijoin, * and inode lock states. */ STATIC int xrep_dir_move_to_orphanage( struct xfs_scrub *sc) { struct xrep_orphanage_req *orph = xrep_dir_orphanage_req(sc); unsigned char *namebuf = xrep_directory_namebuf(sc); int error; /* No orphanage? We can't fix this. */ if (!sc->orphanage) return -EFSCORRUPTED; /* * If we can take the orphanage's iolock then we're ready to move. * * If we can't, release the iolock on the child, and then try to iolock * the orphanage and child at the same time. Use trylock for the * second lock so that we don't ABBA deadlock the system. */ if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) { xfs_ino_t orig_parent, new_parent; orig_parent = xrep_dotdot_lookup(sc); xchk_iunlock(sc, sc->ilock_flags); error = xrep_orphanage_iolock_two(sc); if (error) return error; /* * If the parent changed or the child was unlinked while the * child directory was unlocked, we don't need to move the * child to the orphanage after all. */ new_parent = xrep_dotdot_lookup(sc); if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) return 0; } /* * Move the directory to the orphanage, and let scrub teardown unlock * everything for us. */ xrep_orphanage_compute_blkres(sc, orph); error = xrep_orphanage_compute_name(orph, namebuf); if (error) return error; error = xfs_trans_reserve_more(sc->tp, orph->orphanage_blkres + orph->child_blkres, 0); if (error) return error; error = xrep_orphanage_ilock_resv_quota(orph); if (error) return error; return xrep_orphanage_adopt(orph); } /* * Repair the directory metadata. * * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer * cache in XFS can't handle aliased multiblock buffers, so this might * misbehave if the directory blocks are crosslinked with other filesystem * metadata. * * XXX: Is it necessary to check the dcache for this directory to make sure * that we always recreate every cached entry? */ int xrep_directory( struct xfs_scrub *sc) { struct xrep_dir rd = { .sc = sc, .parent_ino = NULLFSINO, .new_nlink = 2, }; bool move_orphanage = false; int error; /* Set up some storage */ rd.dir_entries = xfarray_create("dir entries", sizeof(struct xrep_directory_key)); if (IS_ERR(rd.dir_entries)) return PTR_ERR(rd.dir_entries); rd.dir_names = xfblob_create("dir names"); if (IS_ERR(rd.dir_names)) { error = PTR_ERR(rd.dir_names); goto out_arr; } /* * Drop the ILOCK and MMAPLOCK on this directory; we don't need to * hold these to maintain control over the directory we're fixing. * This should leave us holding only IOLOCK_EXCL. */ if (sc->ilock_flags & XFS_ILOCK_EXCL) xchk_iunlock(sc, XFS_ILOCK_EXCL); xchk_iunlock(sc, XFS_MMAPLOCK_EXCL); /* Figure out who is going to be the parent of this directory. */ error = xrep_directory_find_parent(&rd, &move_orphanage); if (error) goto out_names; /* * Grab ILOCK_EXCL on both the directory and the tempdir so that we can * salvage directory entries into the tempdir. */ xrep_tempfile_ilock_two(sc, XFS_ILOCK_EXCL); /* Collect directory entries by parsing raw leaf blocks. */ error = xrep_directory_find_entries(&rd); if (error) goto out_names; /* * Now that we've stuffed all the salvaged dirents in the temporary * dir, drop the in-memory staging areas, cancel the scrub transaction, * and drop both ILOCKs. The extent swap that we use to commit the new * directory requires special transactions and locking. */ xfblob_destroy(rd.dir_names); xfarray_destroy(rd.dir_entries); rd.dir_names = NULL; rd.dir_entries = NULL; xchk_trans_cancel(sc); xchk_iunlock(sc, XFS_ILOCK_EXCL); xrep_tempfile_iunlock(sc, XFS_ILOCK_EXCL); error = xrep_directory_rebuild_tree(&rd); if (error || !move_orphanage) return error; /* * We hold ILOCK_EXCL on both the directory and the tempdir after a * successful rebuild. Before we can move the directory to the * orphanage, we must roll to a clean unjoined transaction and drop the * ILOCKs on the dir and the temp dir. We still hold IOLOCK_EXCL on * the dir, so nobody will be able to access it in the mean time. */ error = xfs_trans_roll(&sc->tp); if (error) return error; xchk_iunlock(sc, XFS_ILOCK_EXCL); xrep_tempfile_iunlock(sc, XFS_ILOCK_EXCL); return xrep_dir_move_to_orphanage(sc); out_names: xfblob_destroy(rd.dir_names); out_arr: xfarray_destroy(rd.dir_entries); return error; }